├── README.assets
    └── image-20240301235034715.png
├── mmseg_custom
    ├── datasets
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── distributed_sampler.py
    │   ├── pipelines
    │   │   ├── formating.py
    │   │   ├── add_noise.py
    │   │   ├── __init__.py
    │   │   └── compose.py
    │   ├── dark_zurich.py
    │   ├── night_driving.py
    │   ├── hrf.py
    │   ├── drive.py
    │   ├── isprs.py
    │   ├── stare.py
    │   ├── chase_db1.py
    │   ├── potsdam.py
    │   ├── voc.py
    │   ├── __init__.py
    │   ├── isaid.py
    │   └── loveda.py
    ├── configs
    │   ├── upernet
    │   │   ├── upernet_r101_512x512_80k_ade20k.py
    │   │   ├── upernet_r101_512x512_160k_ade20k.py
    │   │   ├── upernet_r101_512x512_20k_voc12aug.py
    │   │   ├── upernet_r101_512x512_40k_voc12aug.py
    │   │   ├── upernet_r101_512x1024_40k_cityscapes.py
    │   │   ├── upernet_r101_512x1024_80k_cityscapes.py
    │   │   ├── upernet_r101_769x769_40k_cityscapes.py
    │   │   ├── upernet_r101_769x769_80k_cityscapes.py
    │   │   ├── upernet_r50_512x1024_40k_cityscapes.py
    │   │   ├── upernet_r50_512x1024_80k_cityscapes.py
    │   │   ├── upernet_r50_512x512_160k_ade20k.py
    │   │   ├── upernet_r50_512x512_80k_ade20k.py
    │   │   ├── upernet_r50_512x512_20k_voc12aug.py
    │   │   ├── upernet_r50_512x512_40k_voc12aug.py
    │   │   ├── upernet_r50_769x769_40k_cityscapes.py
    │   │   ├── upernet_r50_769x769_80k_cityscapes.py
    │   │   ├── upernet_r101_512x512_40k_voc12aug_hard_pixel.py
    │   │   ├── upernet_r50_512x512_80k_ade20k_hard_pixel.py
    │   │   ├── upernet_r50_512x512_40k_voc12aug_hard_pixel.py
    │   │   └── upernet_r50_512x512_40k_voc12aug_FADC.py
    │   └── _base_
    │   │   ├── datasets
    │   │       ├── pascal_voc12_aug.py
    │   │       ├── cityscapes_768x768.py
    │   │       ├── cityscapes_769x769.py
    │   │       ├── cityscapes_832x832.py
    │   │       ├── cityscapes_1024x1024.py
    │   │       ├── loveda.py
    │   │       ├── potsdam.py
    │   │       ├── vaihingen.py
    │   │       ├── coco-stuff164k.py
    │   │       ├── cityscapes.py
    │   │       ├── ade20k_640x640.py
    │   │       ├── drive.py
    │   │       ├── hrf.py
    │   │       ├── stare.py
    │   │       ├── coco-stuff10k.py
    │   │       ├── chase_db1.py
    │   │       ├── pascal_voc12.py
    │   │       ├── isaid.py
    │   │       ├── ade20k.py
    │   │       ├── pascal_context.py
    │   │       └── pascal_context_59.py
    │   │   ├── default_runtime.py
    │   │   ├── schedules
    │   │       ├── schedule_320k.py
    │   │       ├── schedule_20k.py
    │   │       ├── schedule_40k.py
    │   │       ├── schedule_80k.py
    │   │       └── schedule_160k.py
    │   │   └── models
    │   │       ├── lraspp_m-v3-d8.py
    │   │       ├── dpt_vit-b16.py
    │   │       ├── erfnet_fcn.py
    │   │       ├── segformer_mit-b0.py
    │   │       ├── fpn_r50.py
    │   │       ├── cgnet.py
    │   │       ├── segmenter_vit-b16_mask.py
    │   │       ├── ccnet_r50-d8.py
    │   │       ├── danet_r50-d8.py
    │   │       ├── pspnet_r50-d8.py
    │   │       ├── deeplabv3_r50-d8.py
    │   │       ├── fcn_r50-d8.py
    │   │       ├── isanet_r50-d8.py
    │   │       ├── dmnet_r50-d8.py
    │   │       ├── upernet_r50.py
    │   │       ├── apcnet_r50-d8.py
    │   │       ├── dnl_r50-d8.py
    │   │       ├── nonlocal_r50-d8.py
    │   │       ├── gcnet_r50-d8.py
    │   │       ├── emanet_r50-d8.py
    │   │       ├── ann_r50-d8.py
    │   │       ├── deeplabv3plus_r50-d8.py
    │   │       ├── deeplabv3plus_r50-d8-AAFS.py
    │   │       ├── ocrnet_r50-d8.py
    │   │       ├── psanet_r50-d8.py
    │   │       ├── twins_pcpvt-s_fpn.py
    │   │       ├── encnet_r50-d8.py
    │   │       ├── upernet_mae.py
    │   │       ├── upernet_beit.py
    │   │       ├── upernet_convnext.py
    │   │       ├── pspnet_unet_s5-d16.py
    │   │       ├── deeplabv3_unet_s5-d16.py
    │   │       ├── fcn_unet_s5-d16.py
    │   │       ├── fastfcn_r50-d32_jpu_psp.py
    │   │       ├── fcn_hr18.py
    │   │       ├── upernet_swin.py
    │   │       ├── twins_pcpvt-s_upernet.py
    │   │       ├── pointrend_r50.py
    │   │       ├── upernet_vit-b16_ln_mln.py
    │   │       ├── fast_scnn.py
    │   │       ├── bisenetv1_r18-d32.py
    │   │       ├── icnet_r50-d8.py
    │   │       ├── ocrnet_hr18.py
    │   │       ├── setr_pup.py
    │   │       ├── setr_naive.py
    │   │       ├── upernet_swin_AS.py
    │   │       ├── bisenetv2.py
    │   │       ├── stdc.py
    │   │       └── setr_mla.py
    ├── models
    │   ├── backbones
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── make_divisible.py
    │   │   ├── se_layer.py
    │   │   ├── res_layer.py
    │   │   ├── shape_convert.py
    │   │   └── up_conv_block.py
    ├── core
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── dist_util.py
    │   ├── seg
    │   │   ├── sampler
    │   │   │   ├── __init__.py
    │   │   │   ├── base_pixel_sampler.py
    │   │   │   └── ohem_pixel_sampler.py
    │   │   ├── __init__.py
    │   │   └── builder.py
    │   ├── optimizers
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   └── eval_hooks.py
    │   └── builder.py
    ├── apis
    │   ├── __init__.py
    │   └── inference.py
    ├── version.py
    └── __init__.py
├── dist_train.sh
├── dist_test.sh
├── slurm_train.sh
├── slurm_test.sh
└── get_flops.py


/README.assets/image-20240301235034715.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linwei-Chen/Seg-Aliasing/HEAD/README.assets/image-20240301235034715.png


--------------------------------------------------------------------------------
/mmseg_custom/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .distributed_sampler import DistributedSampler
3 | 
4 | __all__ = ['DistributedSampler']
5 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x512_80k_ade20k.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x512_80k_ade20k.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x512_160k_ade20k.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x512_20k_voc12aug.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x512_20k_voc12aug.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x512_40k_voc12aug.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x1024_40k_cityscapes.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_512x1024_80k_cityscapes.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_769x769_40k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_769x769_40k_cityscapes.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_769x769_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_769x769_80k_cityscapes.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .resnet import ResNet, ResNetV1c, ResNetV1d
3 | __all__ = [
4 |     'ResNet', 'ResNetV1c', 'ResNetV1d', 
5 | ]
6 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .backbones import *  # noqa: F401,F403
3 | from .decode_heads import *  # noqa: F401,F403
4 | from mmseg.models import *
5 | __all__ = [
6 | ]
7 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
4 | ]
5 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dist_util import check_dist_init, sync_random_seed
3 | from .misc import add_prefix
4 | 
5 | __all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed']
6 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/seg/sampler/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base_pixel_sampler import BasePixelSampler
3 | from .ohem_pixel_sampler import OHEMPixelSampler
4 | 
5 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler']
6 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/seg/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .builder import build_pixel_sampler
3 | from .sampler import BasePixelSampler, OHEMPixelSampler
4 | 
5 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler']
6 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .layer_decay_optimizer_constructor import (
3 |     LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor)
4 | 
5 | __all__ = [
6 |     'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
4 | ]
5 | model = dict(
6 |     decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
7 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_80k_ade20k.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 | model = dict(
6 |     decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
7 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_20k_voc12aug.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py',
3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
4 |     '../_base_/schedules/schedule_20k.py'
5 | ]
6 | model = dict(
7 |     decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
8 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py',
3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
4 |     '../_base_/schedules/schedule_40k.py'
5 | ]
6 | model = dict(
7 |     decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
8 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/pascal_voc12_aug.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pascal_voc12.py'
 2 | # dataset settings
 3 | data = dict(
 4 |     train=dict(
 5 |         ann_dir=['SegmentationClass', 'SegmentationClassAug'],
 6 |         split=[
 7 |             'ImageSets/Segmentation/train.txt',
 8 |             'ImageSets/Segmentation/aug.txt'
 9 |         ]))
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/seg/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg
 3 | 
 4 | PIXEL_SAMPLERS = Registry('pixel sampler')
 5 | 
 6 | 
 7 | def build_pixel_sampler(cfg, **default_args):
 8 |     """Build pixel sampler for segmentation map."""
 9 |     return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args)
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # flake8: noqa
 3 | import warnings
 4 | 
 5 | from .formatting import *
 6 | 
 7 | warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be '
 8 |               'deprecated in 2021, please replace it with '
 9 |               'mmseg.datasets.pipelines.formatting.')
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='TensorboardLoggerHook')
 7 |     ])
 8 | # yapf:enable
 9 | dist_params = dict(backend='nccl')
10 | log_level = 'INFO'
11 | load_from = None
12 | resume_from = None
13 | workflow = [('train', 1)]
14 | cudnn_benchmark = True
15 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/seg/sampler/base_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from abc import ABCMeta, abstractmethod
 3 | 
 4 | 
 5 | class BasePixelSampler(metaclass=ABCMeta):
 6 |     """Base class of pixel sampler."""
 7 | 
 8 |     def __init__(self, **kwargs):
 9 |         pass
10 | 
11 |     @abstractmethod
12 |     def sample(self, seg_logit, seg_label):
13 |         """Placeholder for sample function."""
14 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_769x769_40k_cityscapes.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | model = dict(
 7 |     decode_head=dict(align_corners=True),
 8 |     auxiliary_head=dict(align_corners=True),
 9 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_769x769_80k_cityscapes.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_80k.py'
 5 | ]
 6 | model = dict(
 7 |     decode_head=dict(align_corners=True),
 8 |     auxiliary_head=dict(align_corners=True),
 9 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=320000)
 8 | checkpoint_config = dict(by_epoch=False, interval=32000)
 9 | evaluation = dict(interval=32000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=20000)
 8 | checkpoint_config = dict(by_epoch=False, interval=2000)
 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=40000)
 8 | checkpoint_config = dict(by_epoch=False, interval=4000)
 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=80000)
 8 | checkpoint_config = dict(by_epoch=False, interval=8000)
 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=160000)
 8 | checkpoint_config = dict(by_epoch=False, interval=16000)
 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import (OPTIMIZER_BUILDERS, build_optimizer,
 3 |                       build_optimizer_constructor)
 4 | from .evaluation import *  # noqa: F401, F403
 5 | from .optimizers import *  # noqa: F401, F403
 6 | from .seg import *  # noqa: F401, F403
 7 | from .utils import *  # noqa: F401, F403
 8 | 
 9 | __all__ = [
10 |     'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/dark_zurich.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import DATASETS
 3 | from .cityscapes import CityscapesDataset
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class DarkZurichDataset(CityscapesDataset):
 8 |     """DarkZurichDataset dataset."""
 9 | 
10 |     def __init__(self, **kwargs):
11 |         super().__init__(
12 |             img_suffix='_rgb_anon.png',
13 |             seg_map_suffix='_gt_labelTrainIds.png',
14 |             **kwargs)
15 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/night_driving.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import DATASETS
 3 | from .cityscapes import CityscapesDataset
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class NightDrivingDataset(CityscapesDataset):
 8 |     """NightDrivingDataset dataset."""
 9 | 
10 |     def __init__(self, **kwargs):
11 |         super().__init__(
12 |             img_suffix='_leftImg8bit.png',
13 |             seg_map_suffix='_gtCoarse_labelTrainIds.png',
14 |             **kwargs)
15 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def add_prefix(inputs, prefix):
 3 |     """Add prefix for dict.
 4 | 
 5 |     Args:
 6 |         inputs (dict): The input dict with str keys.
 7 |         prefix (str): The prefix to add.
 8 | 
 9 |     Returns:
10 | 
11 |         dict: The dict with keys updated with ``prefix``.
12 |     """
13 | 
14 |     outputs = dict()
15 |     for name, value in inputs.items():
16 |         outputs[f'{prefix}.{name}'] = value
17 | 
18 |     return outputs
19 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .class_names import get_classes, get_palette
 3 | from .eval_hooks import DistEvalHook, EvalHook
 4 | from .metrics import (eval_metrics, intersect_and_union, mean_dice,
 5 |                       mean_fscore, mean_iou, pre_eval_to_metrics)
 6 | 
 7 | __all__ = [
 8 |     'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore',
 9 |     'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics',
10 |     'intersect_and_union'
11 | ]
12 | 


--------------------------------------------------------------------------------
/dist_train.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | GPUS=$2
 3 | NNODES=${NNODES:-1}
 4 | NODE_RANK=${NODE_RANK:-0}
 5 | # PORT=${PORT:-29500}
 6 | PORT=${PORT:-$((1 + RANDOM % 10000))}
 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch \
11 |     --nnodes=$NNODES \
12 |     --node_rank=$NODE_RANK \
13 |     --master_addr=$MASTER_ADDR \
14 |     --nproc_per_node=$GPUS \
15 |     --master_port=$PORT \
16 |     $(dirname "$0")/train.py \
17 |     $CONFIG \
18 |     --launcher pytorch ${@:3}
19 | 


--------------------------------------------------------------------------------
/dist_test.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | CHECKPOINT=$2
 3 | GPUS=$3
 4 | NNODES=${NNODES:-1}
 5 | NODE_RANK=${NODE_RANK:-0}
 6 | PORT=${PORT:-29500}
 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch \
11 |     --nnodes=$NNODES \
12 |     --node_rank=$NODE_RANK \
13 |     --master_addr=$MASTER_ADDR \
14 |     --nproc_per_node=$GPUS \
15 |     --master_port=$PORT \
16 |     $(dirname "$0")/test.py \
17 |     $CONFIG \
18 |     $CHECKPOINT \
19 |     --launcher pytorch \
20 |     ${@:4}
21 | 


--------------------------------------------------------------------------------
/mmseg_custom/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot
 3 | from .test import multi_gpu_test, single_gpu_test
 4 | from .train import (get_root_logger, init_random_seed, set_random_seed,
 5 |                     train_segmentor)
 6 | 
 7 | __all__ = [
 8 |     'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor',
 9 |     'inference_segmentor', 'multi_gpu_test', 'single_gpu_test',
10 |     'show_result_pyplot', 'init_random_seed'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmseg_custom/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '0.25.0'
 4 | 
 5 | 
 6 | def parse_version_info(version_str):
 7 |     version_info = []
 8 |     for x in version_str.split('.'):
 9 |         if x.isdigit():
10 |             version_info.append(int(x))
11 |         elif x.find('rc') != -1:
12 |             patch_version = x.split('rc')
13 |             version_info.append(int(patch_version[0]))
14 |             version_info.append(f'rc{patch_version[1]}')
15 |     return tuple(version_info)
16 | 
17 | 
18 | version_info = parse_version_info(__version__)
19 | 


--------------------------------------------------------------------------------
/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-4}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-4}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .embed import PatchEmbed
 3 | from .inverted_residual import InvertedResidual, InvertedResidualV3
 4 | from .make_divisible import make_divisible
 5 | from .res_layer import ResLayer
 6 | from .se_layer import SELayer
 7 | from .self_attention_block import SelfAttentionBlock
 8 | from .shape_convert import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc,
 9 |                             nlc_to_nchw)
10 | from .up_conv_block import UpConvBlock
11 | 
12 | __all__ = [
13 |     'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual',
14 |     'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed',
15 |     'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r101_512x512_40k_voc12aug_hard_pixel.py:
--------------------------------------------------------------------------------
 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py'
 2 | model = dict(
 3 |     pretrained='open-mmlab://resnet101_v1c', 
 4 |     backbone=dict(
 5 |         depth=101,
 6 |         # type='ResNetV1c',
 7 |         # type='ResNetV1cWithBlur',
 8 |         type='NyResNet',
 9 |         # blur_type='adafreq',
10 |         # blur_type='blur',
11 |         blur_type='flc',
12 |         freq_thres=0.25 * 1.4,
13 |         # blur_k=7,
14 |         with_cp=True,
15 |         # use_checkpoing=True,
16 |         ),
17 |     decode_head=dict(
18 |         type='UPerHead',
19 |         channels=128,)
20 | )
21 | data = dict(
22 |     samples_per_gpu=16,
23 |     workers_per_gpu=16,
24 | )
25 | checkpoint_config = dict(max_keep_ckpts=2)
26 | evaluation = dict(save_best='mIoU', pre_eval='True')


--------------------------------------------------------------------------------
/mmseg_custom/datasets/pipelines/add_noise.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import copy
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | from mmcv.utils import deprecated_api_warning, is_tuple_of
 7 | from numpy import random
 8 | 
 9 | from ..builder import PIPELINES
10 | 
11 | import numpy as np
12 | 
13 | @PIPELINES.register_module()
14 | class AddNoisyImg(object):
15 |     def __init__(self, sigma=10.):
16 |         self.sigma = sigma
17 | 
18 |     def __call__(self, results):
19 |         print(f'Add noise: sigma = {self.sigma}')
20 |         noise = np.random.normal(scale=self.sigma, size=results['img'].shape)
21 |         for key in results.get('img_fields', ['img']):
22 |             results['img'] = np.clip(results['img'].astype(np.int32) + noise, 0, 255).astype(np.uint8)
23 |         
24 |         return results
25 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/lraspp_m-v3-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='MobileNetV3',
 7 |         arch='large',
 8 |         out_indices=(1, 3, 16),
 9 |         norm_cfg=norm_cfg),
10 |     decode_head=dict(
11 |         type='LRASPPHead',
12 |         in_channels=(16, 24, 960),
13 |         in_index=(0, 1, 2),
14 |         channels=128,
15 |         input_transform='multiple_select',
16 |         dropout_ratio=0.1,
17 |         num_classes=19,
18 |         norm_cfg=norm_cfg,
19 |         act_cfg=dict(type='ReLU'),
20 |         align_corners=False,
21 |         loss_decode=dict(
22 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
23 |     # model training and testing settings
24 |     train_cfg=dict(),
25 |     test_cfg=dict(mode='whole'))
26 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/hrf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | from .builder import DATASETS
 4 | from .custom import CustomDataset
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class HRFDataset(CustomDataset):
 9 |     """HRF dataset.
10 | 
11 |     In segmentation map annotation for HRF, 0 stands for background, which is
12 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
13 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
14 |     '.png'.
15 |     """
16 | 
17 |     CLASSES = ('background', 'vessel')
18 | 
19 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
20 | 
21 |     def __init__(self, **kwargs):
22 |         super(HRFDataset, self).__init__(
23 |             img_suffix='.png',
24 |             seg_map_suffix='.png',
25 |             reduce_zero_label=False,
26 |             **kwargs)
27 |         assert self.file_client.exists(self.img_dir)
28 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/drive.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | from .builder import DATASETS
 4 | from .custom import CustomDataset
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class DRIVEDataset(CustomDataset):
 9 |     """DRIVE dataset.
10 | 
11 |     In segmentation map annotation for DRIVE, 0 stands for background, which is
12 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
13 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
14 |     '_manual1.png'.
15 |     """
16 | 
17 |     CLASSES = ('background', 'vessel')
18 | 
19 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
20 | 
21 |     def __init__(self, **kwargs):
22 |         super(DRIVEDataset, self).__init__(
23 |             img_suffix='.png',
24 |             seg_map_suffix='_manual1.png',
25 |             reduce_zero_label=False,
26 |             **kwargs)
27 |         assert self.file_client.exists(self.img_dir)
28 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/isprs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import DATASETS
 3 | from .custom import CustomDataset
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class ISPRSDataset(CustomDataset):
 8 |     """ISPRS dataset.
 9 | 
10 |     In segmentation map annotation for LoveDA, 0 is the ignore index.
11 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
12 |     ``seg_map_suffix`` are both fixed to '.png'.
13 |     """
14 |     CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
15 |                'car', 'clutter')
16 | 
17 |     PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
18 |                [255, 255, 0], [255, 0, 0]]
19 | 
20 |     def __init__(self, **kwargs):
21 |         super(ISPRSDataset, self).__init__(
22 |             img_suffix='.png',
23 |             seg_map_suffix='.png',
24 |             reduce_zero_label=True,
25 |             **kwargs)
26 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/stare.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class STAREDataset(CustomDataset):
10 |     """STARE dataset.
11 | 
12 |     In segmentation map annotation for STARE, 0 stands for background, which is
13 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
14 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
15 |     '.ah.png'.
16 |     """
17 | 
18 |     CLASSES = ('background', 'vessel')
19 | 
20 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
21 | 
22 |     def __init__(self, **kwargs):
23 |         super(STAREDataset, self).__init__(
24 |             img_suffix='.png',
25 |             seg_map_suffix='.ah.png',
26 |             reduce_zero_label=False,
27 |             **kwargs)
28 |         assert osp.exists(self.img_dir)
29 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/chase_db1.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | from .builder import DATASETS
 4 | from .custom import CustomDataset
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class ChaseDB1Dataset(CustomDataset):
 9 |     """Chase_db1 dataset.
10 | 
11 |     In segmentation map annotation for Chase_db1, 0 stands for background,
12 |     which is included in 2 categories. ``reduce_zero_label`` is fixed to False.
13 |     The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
14 |     '_1stHO.png'.
15 |     """
16 | 
17 |     CLASSES = ('background', 'vessel')
18 | 
19 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
20 | 
21 |     def __init__(self, **kwargs):
22 |         super(ChaseDB1Dataset, self).__init__(
23 |             img_suffix='.png',
24 |             seg_map_suffix='_1stHO.png',
25 |             reduce_zero_label=False,
26 |             **kwargs)
27 |         assert self.file_client.exists(self.img_dir)
28 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import DATASETS
 3 | from .custom import CustomDataset
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class PotsdamDataset(CustomDataset):
 8 |     """ISPRS Potsdam dataset.
 9 | 
10 |     In segmentation map annotation for Potsdam dataset, 0 is the ignore index.
11 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
12 |     ``seg_map_suffix`` are both fixed to '.png'.
13 |     """
14 |     CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
15 |                'car', 'clutter')
16 | 
17 |     PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
18 |                [255, 255, 0], [255, 0, 0]]
19 | 
20 |     def __init__(self, **kwargs):
21 |         super(PotsdamDataset, self).__init__(
22 |             img_suffix='.png',
23 |             seg_map_suffix='.png',
24 |             reduce_zero_label=True,
25 |             **kwargs)
26 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .compose import Compose
 3 | from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor,
 4 |                          Transpose, to_tensor)
 5 | from .loading import LoadAnnotations, LoadImageFromFile
 6 | from .test_time_aug import MultiScaleFlipAug
 7 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad,
 8 |                          PhotoMetricDistortion, RandomCrop, RandomCutOut,
 9 |                          RandomFlip, RandomMosaic, RandomRotate, Rerange,
10 |                          Resize, RGB2Gray, SegRescale)
11 | from .add_noise import AddNoisyImg
12 | __all__ = [
13 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
14 |     'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
15 |     'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
16 |     'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate',
17 |     'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut',
18 |     'RandomMosaic',
19 |     'AddNoisyImg'
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/dpt_vit-b16.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | model = dict(
 3 |     type='EncoderDecoder',
 4 |     pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
 5 |     backbone=dict(
 6 |         type='VisionTransformer',
 7 |         img_size=224,
 8 |         embed_dims=768,
 9 |         num_layers=12,
10 |         num_heads=12,
11 |         out_indices=(2, 5, 8, 11),
12 |         final_norm=False,
13 |         with_cls_token=True,
14 |         output_cls_token=True),
15 |     decode_head=dict(
16 |         type='DPTHead',
17 |         in_channels=(768, 768, 768, 768),
18 |         channels=256,
19 |         embed_dims=768,
20 |         post_process_channels=[96, 192, 384, 768],
21 |         num_classes=150,
22 |         readout_type='project',
23 |         input_transform='multiple_select',
24 |         in_index=(0, 1, 2, 3),
25 |         norm_cfg=norm_cfg,
26 |         loss_decode=dict(
27 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
28 |     auxiliary_head=None,
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))  # yapf: disable
32 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/erfnet_fcn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='ERFNet',
 8 |         in_channels=3,
 9 |         enc_downsample_channels=(16, 64, 128),
10 |         enc_stage_non_bottlenecks=(5, 8),
11 |         enc_non_bottleneck_dilations=(2, 4, 8, 16),
12 |         enc_non_bottleneck_channels=(64, 128),
13 |         dec_upsample_channels=(64, 16),
14 |         dec_stages_non_bottleneck=(2, 2),
15 |         dec_non_bottleneck_channels=(64, 16),
16 |         dropout_ratio=0.1,
17 |         init_cfg=None),
18 |     decode_head=dict(
19 |         type='FCNHead',
20 |         in_channels=16,
21 |         channels=128,
22 |         num_convs=1,
23 |         concat_input=False,
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/segformer_mit-b0.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='MixVisionTransformer',
 8 |         in_channels=3,
 9 |         embed_dims=32,
10 |         num_stages=4,
11 |         num_layers=[2, 2, 2, 2],
12 |         num_heads=[1, 2, 5, 8],
13 |         patch_sizes=[7, 3, 3, 3],
14 |         sr_ratios=[8, 4, 2, 1],
15 |         out_indices=(0, 1, 2, 3),
16 |         mlp_ratio=4,
17 |         qkv_bias=True,
18 |         drop_rate=0.0,
19 |         attn_drop_rate=0.0,
20 |         drop_path_rate=0.1),
21 |     decode_head=dict(
22 |         type='SegformerHead',
23 |         in_channels=[32, 64, 160, 256],
24 |         in_index=[0, 1, 2, 3],
25 |         channels=256,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     # model training and testing settings
33 |     train_cfg=dict(),
34 |     test_cfg=dict(mode='whole'))
35 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         num_outs=4),
22 |     decode_head=dict(
23 |         type='FPNHead',
24 |         in_channels=[256, 256, 256, 256],
25 |         in_index=[0, 1, 2, 3],
26 |         feature_strides=[4, 8, 16, 32],
27 |         channels=128,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     # model training and testing settings
35 |     train_cfg=dict(),
36 |     test_cfg=dict(mode='whole'))
37 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/cgnet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='CGNet',
 7 |         norm_cfg=norm_cfg,
 8 |         in_channels=3,
 9 |         num_channels=(32, 64, 128),
10 |         num_blocks=(3, 21),
11 |         dilations=(2, 4),
12 |         reductions=(8, 16)),
13 |     decode_head=dict(
14 |         type='FCNHead',
15 |         in_channels=256,
16 |         in_index=2,
17 |         channels=256,
18 |         num_convs=0,
19 |         concat_input=False,
20 |         dropout_ratio=0,
21 |         num_classes=19,
22 |         norm_cfg=norm_cfg,
23 |         loss_decode=dict(
24 |             type='CrossEntropyLoss',
25 |             use_sigmoid=False,
26 |             loss_weight=1.0,
27 |             class_weight=[
28 |                 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
29 |                 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
30 |                 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
31 |                 10.396974, 10.055647
32 |             ])),
33 |     # model training and testing settings
34 |     train_cfg=dict(sampler=None),
35 |     test_cfg=dict(mode='whole'))
36 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/segmenter_vit-b16_mask.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth'  # noqa
 2 | # model settings
 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained=checkpoint,
 7 |     backbone=dict(
 8 |         type='VisionTransformer',
 9 |         img_size=(512, 512),
10 |         patch_size=16,
11 |         in_channels=3,
12 |         embed_dims=768,
13 |         num_layers=12,
14 |         num_heads=12,
15 |         drop_path_rate=0.1,
16 |         attn_drop_rate=0.0,
17 |         drop_rate=0.0,
18 |         final_norm=True,
19 |         norm_cfg=backbone_norm_cfg,
20 |         with_cls_token=True,
21 |         interpolate_mode='bicubic',
22 |     ),
23 |     decode_head=dict(
24 |         type='SegmenterMaskTransformerHead',
25 |         in_channels=768,
26 |         channels=768,
27 |         num_classes=150,
28 |         num_layers=2,
29 |         num_heads=12,
30 |         embed_dims=768,
31 |         dropout_ratio=0.0,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
34 |     ),
35 |     test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)),
36 | )
37 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class PascalVOCDataset(CustomDataset):
10 |     """Pascal VOC dataset.
11 | 
12 |     Args:
13 |         split (str): Split txt file for Pascal VOC.
14 |     """
15 | 
16 |     CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
17 |                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
18 |                'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
19 |                'train', 'tvmonitor')
20 | 
21 |     PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
22 |                [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
23 |                [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
24 |                [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
25 |                [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
26 | 
27 |     def __init__(self, split, **kwargs):
28 |         super(PascalVOCDataset, self).__init__(
29 |             img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs)
30 |         assert osp.exists(self.img_dir) and self.split is not None
31 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import copy
 3 | 
 4 | from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS
 5 | from mmcv.utils import Registry, build_from_cfg
 6 | 
 7 | OPTIMIZER_BUILDERS = Registry(
 8 |     'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS)
 9 | 
10 | 
11 | def build_optimizer_constructor(cfg):
12 |     constructor_type = cfg.get('type')
13 |     if constructor_type in OPTIMIZER_BUILDERS:
14 |         return build_from_cfg(cfg, OPTIMIZER_BUILDERS)
15 |     elif constructor_type in MMCV_OPTIMIZER_BUILDERS:
16 |         return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS)
17 |     else:
18 |         raise KeyError(f'{constructor_type} is not registered '
19 |                        'in the optimizer builder registry.')
20 | 
21 | 
22 | def build_optimizer(model, cfg):
23 |     optimizer_cfg = copy.deepcopy(cfg)
24 |     constructor_type = optimizer_cfg.pop('constructor',
25 |                                          'DefaultOptimizerConstructor')
26 |     paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
27 |     optim_constructor = build_optimizer_constructor(
28 |         dict(
29 |             type=constructor_type,
30 |             optimizer_cfg=optimizer_cfg,
31 |             paramwise_cfg=paramwise_cfg))
32 |     optimizer = optim_constructor(model)
33 |     return optimizer
34 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/make_divisible.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
 3 |     """Make divisible function.
 4 | 
 5 |     This function rounds the channel number to the nearest value that can be
 6 |     divisible by the divisor. It is taken from the original tf repo. It ensures
 7 |     that all layers have a channel number that is divisible by divisor. It can
 8 |     be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py  # noqa
 9 | 
10 |     Args:
11 |         value (int): The original channel number.
12 |         divisor (int): The divisor to fully divide the channel number.
13 |         min_value (int): The minimum value of the output channel.
14 |             Default: None, means that the minimum value equal to the divisor.
15 |         min_ratio (float): The minimum ratio of the rounded channel number to
16 |             the original channel number. Default: 0.9.
17 | 
18 |     Returns:
19 |         int: The modified output channel number.
20 |     """
21 | 
22 |     if min_value is None:
23 |         min_value = divisor
24 |     new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
25 |     # Make sure that round down does not go down by more than (1-min_ratio).
26 |     if new_value < min_ratio * value:
27 |         new_value += divisor
28 |     return new_value
29 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .ade import ADE20KDataset
 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 4 | from .chase_db1 import ChaseDB1Dataset
 5 | from .cityscapes import CityscapesDataset
 6 | from .coco_stuff import COCOStuffDataset
 7 | from .custom import CustomDataset
 8 | from .dark_zurich import DarkZurichDataset
 9 | from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset,
10 |                                RepeatDataset)
11 | from .drive import DRIVEDataset
12 | from .hrf import HRFDataset
13 | from .isaid import iSAIDDataset
14 | from .isprs import ISPRSDataset
15 | from .loveda import LoveDADataset
16 | from .night_driving import NightDrivingDataset
17 | from .pascal_context import PascalContextDataset, PascalContextDataset59
18 | from .potsdam import PotsdamDataset
19 | from .stare import STAREDataset
20 | from .voc import PascalVOCDataset
21 | 
22 | __all__ = [
23 |     'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
24 |     'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
25 |     'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
26 |     'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset',
27 |     'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset',
28 |     'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset',
29 |     'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset'
30 | ]
31 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/cityscapes_768x768.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (768, 768)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2049, 1025),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/cityscapes_769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (769, 769)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2049, 1025),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/cityscapes_832x832.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (832, 832)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (1024, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/ccnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='CCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         recurrence=2,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/danet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pam_channels=64,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/pspnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='PSPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pool_scales=(1, 2, 3, 6),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/deeplabv3_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ASPPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dilations=(1, 12, 24, 36),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fcn_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='FCNHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         num_convs=2,
23 |         concat_input=True,
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     auxiliary_head=dict(
31 |         type='FCNHead',
32 |         in_channels=1024,
33 |         in_index=2,
34 |         channels=256,
35 |         num_convs=1,
36 |         concat_input=False,
37 |         dropout_ratio=0.1,
38 |         num_classes=19,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43 |     # model training and testing settings
44 |     train_cfg=dict(),
45 |     test_cfg=dict(mode='whole'))
46 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/isanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ISAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         isa_channels=256,
23 |         down_factor=(8, 8),
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     auxiliary_head=dict(
31 |         type='FCNHead',
32 |         in_channels=1024,
33 |         in_index=2,
34 |         channels=256,
35 |         num_convs=1,
36 |         concat_input=False,
37 |         dropout_ratio=0.1,
38 |         num_classes=19,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43 |     # model training and testing settings
44 |     train_cfg=dict(),
45 |     test_cfg=dict(mode='whole'))
46 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/dmnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DMHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         filter_sizes=(1, 3, 5, 7),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='UPerHead',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         in_index=[0, 1, 2, 3],
21 |         pool_scales=(1, 2, 3, 6),
22 |         channels=512,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/apcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='APCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pool_scales=(1, 2, 3, 6),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_80k_ade20k_hard_pixel.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './upernet_r50_512x512_80k_ade20k.py'
 3 | ]
 4 | model = dict(
 5 |     pretrained=None, 
 6 |     # pretrained='open-mmlab://resnet50_v1c', 
 7 |     backbone=dict(
 8 |         depth=50,
 9 |         # type='ResNetV1c',
10 |         # type='ResNetV1cWithBlur',
11 |         # type='NyResNetFreezePretrain',
12 |         # frozen_stages=4,
13 |         type='NyResNet',
14 |         # blur_type='adafreq',
15 |         # blur_type='blur',
16 |         blur_type='flc',
17 |         freq_thres=0.25 * 1.4,
18 |         # blur_k=7,
19 |         with_cp=True,
20 |         # use_checkpoing=True,
21 |         init_cfg=dict(
22 |             type='Pretrained',
23 |             checkpoint='open-mmlab://resnet50_v1c',
24 |             # prefix='backbone.'
25 |             )
26 |         ),
27 |     decode_head=dict(
28 |         type='UPerHead',
29 |         channels=128,
30 |         )
31 | )
32 | data = dict(
33 |     samples_per_gpu=16,
34 |     workers_per_gpu=8,
35 | )
36 | optimizer = dict(
37 |     paramwise_cfg = dict(
38 |         custom_keys={
39 |             # 'FPNDyHPAlign': dict(lr_mult=2.), 
40 |             # 'FPNFADyHPAlign': dict(lr_mult=2.), 
41 |             # 'FaPNDyHPAlign': dict(lr_mult=2.), 
42 |             'head': dict(lr_mult=2.),
43 |             'att': dict(lr_mult=2.),
44 |             # 'comp_conv': dict(lr_mult=2.),
45 |             }))
46 | checkpoint_config = dict(max_keep_ckpts=2)
47 | evaluation = dict(save_best='mIoU', pre_eval='True')
48 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug_hard_pixel.py:
--------------------------------------------------------------------------------
 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py'
 2 | model = dict(
 3 |     pretrained=None, 
 4 |     # pretrained='open-mmlab://resnet50_v1c', 
 5 |     backbone=dict(
 6 |         depth=50,
 7 |         # type='ResNetV1c',
 8 |         # type='ResNetV1cWithBlur',
 9 |         # type='NyResNetFreezePretrain',
10 |         # frozen_stages=4,
11 |         type='NyResNet',
12 |         # type='ResNetFreqMix',
13 |         # blur_type='adafreq',
14 |         # blur_type='blur',
15 |         blur_type='flc',
16 |         freq_thres=0.25 * 1.4,
17 |         # blur_k=7,
18 |         # with_cp=True,
19 |         # use_checkpoing=True,
20 |         init_cfg=dict(
21 |             type='Pretrained',
22 |             checkpoint='open-mmlab://resnet50_v1c',
23 |             # prefix='backbone.'
24 |             )
25 |         ),
26 |     decode_head=dict(
27 |         type='UPerHead',
28 |         channels=128,)
29 | )
30 | data = dict(
31 |     samples_per_gpu=16,
32 |     workers_per_gpu=16,
33 | )
34 | checkpoint_config = dict(max_keep_ckpts=2)
35 | optimizer = dict(
36 |     paramwise_cfg = dict(
37 |         custom_keys={
38 |             # 'FPNDyHPAlign': dict(lr_mult=2.), 
39 |             # 'FPNFADyHPAlign': dict(lr_mult=2.), 
40 |             # 'FaPNDyHPAlign': dict(lr_mult=2.), 
41 |             'head': dict(lr_mult=2.),
42 |             'att': dict(lr_mult=2.),
43 |             # 'comp_conv': dict(lr_mult=2.),
44 |             }))
45 | evaluation = dict(save_best='mIoU', pre_eval='True')


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/dnl_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DNLHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dropout_ratio=0.1,
23 |         reduction=2,
24 |         use_scale=True,
25 |         mode='embedded_gaussian',
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/nonlocal_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='NLHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dropout_ratio=0.1,
23 |         reduction=2,
24 |         use_scale=True,
25 |         mode='embedded_gaussian',
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/gcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='GCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         ratio=1 / 4.,
23 |         pooling_type='att',
24 |         fusion_types=('channel_add', ),
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/emanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='EMAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=256,
22 |         ema_channels=512,
23 |         num_bases=64,
24 |         num_stages=3,
25 |         momentum=0.1,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     auxiliary_head=dict(
33 |         type='FCNHead',
34 |         in_channels=1024,
35 |         in_index=2,
36 |         channels=256,
37 |         num_convs=1,
38 |         concat_input=False,
39 |         dropout_ratio=0.1,
40 |         num_classes=19,
41 |         norm_cfg=norm_cfg,
42 |         align_corners=False,
43 |         loss_decode=dict(
44 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/ann_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ANNHead',
19 |         in_channels=[1024, 2048],
20 |         in_index=[2, 3],
21 |         channels=512,
22 |         project_channels=256,
23 |         query_scales=(1, ),
24 |         key_pool_scales=(1, 3, 6, 8),
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/deeplabv3plus_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DepthwiseSeparableASPPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dilations=(1, 12, 24, 36),
23 |         c1_in_channels=256,
24 |         c1_channels=48,
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/deeplabv3plus_r50-d8-AAFS.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         with_cp=True,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         dilations=(1, 1, 2, 4),
13 |         strides=(1, 2, 1, 1),
14 |         norm_cfg=norm_cfg,
15 |         norm_eval=False,
16 |         style='pytorch',
17 |         contract_dilation=True),
18 |     decode_head=dict(
19 |         type='AADepthwiseSeparableASPPHead',
20 |         in_channels=2048,
21 |         in_index=3,
22 |         channels=512,
23 |         dilations=(1, 12, 24, 36),
24 |         c1_in_channels=256,
25 |         c1_channels=48,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     auxiliary_head=dict(
33 |         type='FCNHead',
34 |         in_channels=1024,
35 |         in_index=2,
36 |         channels=256,
37 |         num_convs=1,
38 |         concat_input=False,
39 |         dropout_ratio=0.1,
40 |         num_classes=19,
41 |         norm_cfg=norm_cfg,
42 |         align_corners=False,
43 |         loss_decode=dict(
44 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/ocrnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='CascadeEncoderDecoder',
 5 |     num_stages=2,
 6 |     pretrained='open-mmlab://resnet50_v1c',
 7 |     backbone=dict(
 8 |         type='ResNetV1c',
 9 |         depth=50,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         dilations=(1, 1, 2, 4),
13 |         strides=(1, 2, 1, 1),
14 |         norm_cfg=norm_cfg,
15 |         norm_eval=False,
16 |         style='pytorch',
17 |         contract_dilation=True),
18 |     decode_head=[
19 |         dict(
20 |             type='FCNHead',
21 |             in_channels=1024,
22 |             in_index=2,
23 |             channels=256,
24 |             num_convs=1,
25 |             concat_input=False,
26 |             dropout_ratio=0.1,
27 |             num_classes=19,
28 |             norm_cfg=norm_cfg,
29 |             align_corners=False,
30 |             loss_decode=dict(
31 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
32 |         dict(
33 |             type='OCRHead',
34 |             in_channels=2048,
35 |             in_index=3,
36 |             channels=512,
37 |             ocr_channels=256,
38 |             dropout_ratio=0.1,
39 |             num_classes=19,
40 |             norm_cfg=norm_cfg,
41 |             align_corners=False,
42 |             loss_decode=dict(
43 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
44 |     ],
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/psanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='PSAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         mask_size=(97, 97),
23 |         psa_type='bi-direction',
24 |         compact=False,
25 |         shrink_factor=2,
26 |         normalization_factor=1.0,
27 |         psa_softmax=True,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     auxiliary_head=dict(
35 |         type='FCNHead',
36 |         in_channels=1024,
37 |         in_index=2,
38 |         channels=256,
39 |         num_convs=1,
40 |         concat_input=False,
41 |         dropout_ratio=0.1,
42 |         num_classes=19,
43 |         norm_cfg=norm_cfg,
44 |         align_corners=False,
45 |         loss_decode=dict(
46 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/twins_pcpvt-s_fpn.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth'  # noqa
 2 | 
 3 | # model settings
 4 | backbone_norm_cfg = dict(type='LN')
 5 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 6 | model = dict(
 7 |     type='EncoderDecoder',
 8 |     backbone=dict(
 9 |         type='PCPVT',
10 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
11 |         in_channels=3,
12 |         embed_dims=[64, 128, 320, 512],
13 |         num_heads=[1, 2, 5, 8],
14 |         patch_sizes=[4, 2, 2, 2],
15 |         strides=[4, 2, 2, 2],
16 |         mlp_ratios=[8, 8, 4, 4],
17 |         out_indices=(0, 1, 2, 3),
18 |         qkv_bias=True,
19 |         norm_cfg=backbone_norm_cfg,
20 |         depths=[3, 4, 6, 3],
21 |         sr_ratios=[8, 4, 2, 1],
22 |         norm_after_stage=False,
23 |         drop_rate=0.0,
24 |         attn_drop_rate=0.,
25 |         drop_path_rate=0.2),
26 |     neck=dict(
27 |         type='FPN',
28 |         in_channels=[64, 128, 320, 512],
29 |         out_channels=256,
30 |         num_outs=4),
31 |     decode_head=dict(
32 |         type='FPNHead',
33 |         in_channels=[256, 256, 256, 256],
34 |         in_index=[0, 1, 2, 3],
35 |         feature_strides=[4, 8, 16, 32],
36 |         channels=128,
37 |         dropout_ratio=0.1,
38 |         num_classes=150,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     # model training and testing settings
44 |     train_cfg=dict(),
45 |     test_cfg=dict(mode='whole'))
46 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/encnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='EncHead',
19 |         in_channels=[512, 1024, 2048],
20 |         in_index=(1, 2, 3),
21 |         channels=512,
22 |         num_codes=32,
23 |         use_se_loss=True,
24 |         add_lateral=False,
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
31 |         loss_se_decode=dict(
32 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
33 |     auxiliary_head=dict(
34 |         type='FCNHead',
35 |         in_channels=1024,
36 |         in_index=2,
37 |         channels=256,
38 |         num_convs=1,
39 |         concat_input=False,
40 |         dropout_ratio=0.1,
41 |         num_classes=19,
42 |         norm_cfg=norm_cfg,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
46 |     # model training and testing settings
47 |     train_cfg=dict(),
48 |     test_cfg=dict(mode='whole'))
49 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_mae.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | model = dict(
 3 |     type='EncoderDecoder',
 4 |     pretrained=None,
 5 |     backbone=dict(
 6 |         type='MAE',
 7 |         img_size=(640, 640),
 8 |         patch_size=16,
 9 |         in_channels=3,
10 |         embed_dims=768,
11 |         num_layers=12,
12 |         num_heads=12,
13 |         mlp_ratio=4,
14 |         out_indices=(3, 5, 7, 11),
15 |         attn_drop_rate=0.0,
16 |         drop_path_rate=0.1,
17 |         norm_cfg=dict(type='LN', eps=1e-6),
18 |         act_cfg=dict(type='GELU'),
19 |         norm_eval=False,
20 |         init_values=0.1),
21 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
22 |     decode_head=dict(
23 |         type='UPerHead',
24 |         in_channels=[384, 384, 384, 384],
25 |         in_index=[0, 1, 2, 3],
26 |         pool_scales=(1, 2, 3, 6),
27 |         channels=512,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     auxiliary_head=dict(
35 |         type='FCNHead',
36 |         in_channels=384,
37 |         in_index=2,
38 |         channels=256,
39 |         num_convs=1,
40 |         concat_input=False,
41 |         dropout_ratio=0.1,
42 |         num_classes=19,
43 |         norm_cfg=norm_cfg,
44 |         align_corners=False,
45 |         loss_decode=dict(
46 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_beit.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | model = dict(
 3 |     type='EncoderDecoder',
 4 |     pretrained=None,
 5 |     backbone=dict(
 6 |         type='BEiT',
 7 |         img_size=(640, 640),
 8 |         patch_size=16,
 9 |         in_channels=3,
10 |         embed_dims=768,
11 |         num_layers=12,
12 |         num_heads=12,
13 |         mlp_ratio=4,
14 |         out_indices=(3, 5, 7, 11),
15 |         qv_bias=True,
16 |         attn_drop_rate=0.0,
17 |         drop_path_rate=0.1,
18 |         norm_cfg=dict(type='LN', eps=1e-6),
19 |         act_cfg=dict(type='GELU'),
20 |         norm_eval=False,
21 |         init_values=0.1),
22 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
23 |     decode_head=dict(
24 |         type='UPerHead',
25 |         in_channels=[768, 768, 768, 768],
26 |         in_index=[0, 1, 2, 3],
27 |         pool_scales=(1, 2, 3, 6),
28 |         channels=768,
29 |         dropout_ratio=0.1,
30 |         num_classes=150,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=dict(
36 |         type='FCNHead',
37 |         in_channels=768,
38 |         in_index=2,
39 |         channels=256,
40 |         num_convs=1,
41 |         concat_input=False,
42 |         dropout_ratio=0.1,
43 |         num_classes=150,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='whole'))
51 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_convnext.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | custom_imports = dict(imports='mmcls.models', allow_failed_imports=False)
 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth'  # noqa
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained=None,
 7 |     backbone=dict(
 8 |         type='mmcls.ConvNeXt',
 9 |         arch='base',
10 |         out_indices=[0, 1, 2, 3],
11 |         drop_path_rate=0.4,
12 |         layer_scale_init_value=1.0,
13 |         gap_before_final_norm=False,
14 |         init_cfg=dict(
15 |             type='Pretrained', checkpoint=checkpoint_file,
16 |             prefix='backbone.')),
17 |     decode_head=dict(
18 |         type='UPerHead',
19 |         in_channels=[128, 256, 512, 1024],
20 |         in_index=[0, 1, 2, 3],
21 |         pool_scales=(1, 2, 3, 6),
22 |         channels=512,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=384,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/utils/dist_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import numpy as np
 3 | import torch
 4 | import torch.distributed as dist
 5 | from mmcv.runner import get_dist_info
 6 | 
 7 | 
 8 | def check_dist_init():
 9 |     return dist.is_available() and dist.is_initialized()
10 | 
11 | 
12 | def sync_random_seed(seed=None, device='cuda'):
13 |     """Make sure different ranks share the same seed. All workers must call
14 |     this function, otherwise it will deadlock. This method is generally used in
15 |     `DistributedSampler`, because the seed should be identical across all
16 |     processes in the distributed group.
17 | 
18 |     In distributed sampling, different ranks should sample non-overlapped
19 |     data in the dataset. Therefore, this function is used to make sure that
20 |     each rank shuffles the data indices in the same order based
21 |     on the same seed. Then different ranks could use different indices
22 |     to select non-overlapped data from the same data list.
23 | 
24 |     Args:
25 |         seed (int, Optional): The seed. Default to None.
26 |         device (str): The device where the seed will be put on.
27 |             Default to 'cuda'.
28 |     Returns:
29 |         int: Seed to be used.
30 |     """
31 | 
32 |     if seed is None:
33 |         seed = np.random.randint(2**31)
34 |     assert isinstance(seed, int)
35 | 
36 |     rank, world_size = get_dist_info()
37 | 
38 |     if world_size == 1:
39 |         return seed
40 | 
41 |     if rank == 0:
42 |         random_num = torch.tensor(seed, dtype=torch.int32, device=device)
43 |     else:
44 |         random_num = torch.tensor(0, dtype=torch.int32, device=device)
45 |     dist.broadcast(random_num, src=0)
46 |     return random_num.item()
47 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/pspnet_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='PSPHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=16,
28 |         pool_scales=(1, 2, 3, 6),
29 |         dropout_ratio=0.1,
30 |         num_classes=2,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=dict(
36 |         type='FCNHead',
37 |         in_channels=128,
38 |         in_index=3,
39 |         channels=64,
40 |         num_convs=1,
41 |         concat_input=False,
42 |         dropout_ratio=0.1,
43 |         num_classes=2,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
51 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/deeplabv3_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='ASPPHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=16,
28 |         dilations=(1, 12, 24, 36),
29 |         dropout_ratio=0.1,
30 |         num_classes=2,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=dict(
36 |         type='FCNHead',
37 |         in_channels=128,
38 |         in_index=3,
39 |         channels=64,
40 |         num_convs=1,
41 |         concat_input=False,
42 |         dropout_ratio=0.1,
43 |         num_classes=2,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
51 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import collections
 3 | 
 4 | from mmcv.utils import build_from_cfg
 5 | 
 6 | from ..builder import PIPELINES
 7 | 
 8 | 
 9 | @PIPELINES.register_module()
10 | class Compose(object):
11 |     """Compose multiple transforms sequentially.
12 | 
13 |     Args:
14 |         transforms (Sequence[dict | callable]): Sequence of transform object or
15 |             config dict to be composed.
16 |     """
17 | 
18 |     def __init__(self, transforms):
19 |         assert isinstance(transforms, collections.abc.Sequence)
20 |         self.transforms = []
21 |         for transform in transforms:
22 |             if isinstance(transform, dict):
23 |                 transform = build_from_cfg(transform, PIPELINES)
24 |                 self.transforms.append(transform)
25 |             elif callable(transform):
26 |                 self.transforms.append(transform)
27 |             else:
28 |                 raise TypeError('transform must be callable or a dict')
29 | 
30 |     def __call__(self, data):
31 |         """Call function to apply transforms sequentially.
32 | 
33 |         Args:
34 |             data (dict): A result dict contains the data to transform.
35 | 
36 |         Returns:
37 |            dict: Transformed data.
38 |         """
39 | 
40 |         for t in self.transforms:
41 |             data = t(data)
42 |             if data is None:
43 |                 return None
44 |         return data
45 | 
46 |     def __repr__(self):
47 |         format_string = self.__class__.__name__ + '('
48 |         for t in self.transforms:
49 |             format_string += '\n'
50 |             format_string += f'    {t}'
51 |         format_string += '\n)'
52 |         return format_string
53 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fcn_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='FCNHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=64,
28 |         num_convs=1,
29 |         concat_input=False,
30 |         dropout_ratio=0.1,
31 |         num_classes=2,
32 |         norm_cfg=norm_cfg,
33 |         align_corners=False,
34 |         loss_decode=dict(
35 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |     auxiliary_head=dict(
37 |         type='FCNHead',
38 |         in_channels=128,
39 |         in_index=3,
40 |         channels=64,
41 |         num_convs=1,
42 |         concat_input=False,
43 |         dropout_ratio=0.1,
44 |         num_classes=2,
45 |         norm_cfg=norm_cfg,
46 |         align_corners=False,
47 |         loss_decode=dict(
48 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
49 |     # model training and testing settings
50 |     train_cfg=dict(),
51 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
52 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         dilations=(1, 1, 2, 4),
11 |         strides=(1, 2, 2, 2),
12 |         out_indices=(1, 2, 3),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     neck=dict(
18 |         type='JPU',
19 |         in_channels=(512, 1024, 2048),
20 |         mid_channels=512,
21 |         start_level=0,
22 |         end_level=-1,
23 |         dilations=(1, 2, 4, 8),
24 |         align_corners=False,
25 |         norm_cfg=norm_cfg),
26 |     decode_head=dict(
27 |         type='PSPHead',
28 |         in_channels=2048,
29 |         in_index=2,
30 |         channels=512,
31 |         pool_scales=(1, 2, 3, 6),
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=1,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fcn_hr18.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://msra/hrnetv2_w18',
 6 |     backbone=dict(
 7 |         type='HRNet',
 8 |         norm_cfg=norm_cfg,
 9 |         norm_eval=False,
10 |         extra=dict(
11 |             stage1=dict(
12 |                 num_modules=1,
13 |                 num_branches=1,
14 |                 block='BOTTLENECK',
15 |                 num_blocks=(4, ),
16 |                 num_channels=(64, )),
17 |             stage2=dict(
18 |                 num_modules=1,
19 |                 num_branches=2,
20 |                 block='BASIC',
21 |                 num_blocks=(4, 4),
22 |                 num_channels=(18, 36)),
23 |             stage3=dict(
24 |                 num_modules=4,
25 |                 num_branches=3,
26 |                 block='BASIC',
27 |                 num_blocks=(4, 4, 4),
28 |                 num_channels=(18, 36, 72)),
29 |             stage4=dict(
30 |                 num_modules=3,
31 |                 num_branches=4,
32 |                 block='BASIC',
33 |                 num_blocks=(4, 4, 4, 4),
34 |                 num_channels=(18, 36, 72, 144)))),
35 |     decode_head=dict(
36 |         type='FCNHead',
37 |         in_channels=[18, 36, 72, 144],
38 |         in_index=(0, 1, 2, 3),
39 |         channels=sum([18, 36, 72, 144]),
40 |         input_transform='resize_concat',
41 |         kernel_size=1,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=-1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_swin.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained=None,
 7 |     backbone=dict(
 8 |         type='SwinTransformer',
 9 |         pretrain_img_size=224,
10 |         embed_dims=96,
11 |         patch_size=4,
12 |         window_size=7,
13 |         mlp_ratio=4,
14 |         depths=[2, 2, 6, 2],
15 |         num_heads=[3, 6, 12, 24],
16 |         strides=(4, 2, 2, 2),
17 |         out_indices=(0, 1, 2, 3),
18 |         qkv_bias=True,
19 |         qk_scale=None,
20 |         patch_norm=True,
21 |         drop_rate=0.,
22 |         attn_drop_rate=0.,
23 |         drop_path_rate=0.3,
24 |         use_abs_pos_embed=False,
25 |         act_cfg=dict(type='GELU'),
26 |         norm_cfg=backbone_norm_cfg),
27 |     decode_head=dict(
28 |         type='UPerHead',
29 |         # type='UPerHeadASAlign',
30 |         in_channels=[96, 192, 384, 768],
31 |         in_index=[0, 1, 2, 3],
32 |         pool_scales=(1, 2, 3, 6),
33 |         # channels=512,
34 |         channels=256,
35 |         dropout_ratio=0.1,
36 |         num_classes=19,
37 |         norm_cfg=norm_cfg,
38 |         align_corners=False,
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
41 |     auxiliary_head=dict(
42 |         type='FCNHead',
43 |         in_channels=384,
44 |         in_index=2,
45 |         channels=256,
46 |         num_convs=1,
47 |         concat_input=False,
48 |         dropout_ratio=0.1,
49 |         num_classes=19,
50 |         norm_cfg=norm_cfg,
51 |         align_corners=False,
52 |         loss_decode=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54 |     # model training and testing settings
55 |     train_cfg=dict(),
56 |     test_cfg=dict(mode='whole'))
57 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/twins_pcpvt-s_upernet.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth'  # noqa
 2 | 
 3 | # model settings
 4 | backbone_norm_cfg = dict(type='LN')
 5 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 6 | model = dict(
 7 |     type='EncoderDecoder',
 8 |     backbone=dict(
 9 |         type='PCPVT',
10 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
11 |         in_channels=3,
12 |         embed_dims=[64, 128, 320, 512],
13 |         num_heads=[1, 2, 5, 8],
14 |         patch_sizes=[4, 2, 2, 2],
15 |         strides=[4, 2, 2, 2],
16 |         mlp_ratios=[8, 8, 4, 4],
17 |         out_indices=(0, 1, 2, 3),
18 |         qkv_bias=True,
19 |         norm_cfg=backbone_norm_cfg,
20 |         depths=[3, 4, 6, 3],
21 |         sr_ratios=[8, 4, 2, 1],
22 |         norm_after_stage=False,
23 |         drop_rate=0.0,
24 |         attn_drop_rate=0.,
25 |         drop_path_rate=0.2),
26 |     decode_head=dict(
27 |         type='UPerHead',
28 |         in_channels=[64, 128, 320, 512],
29 |         in_index=[0, 1, 2, 3],
30 |         pool_scales=(1, 2, 3, 6),
31 |         channels=512,
32 |         dropout_ratio=0.1,
33 |         num_classes=150,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=320,
41 |         in_index=2,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=150,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/pointrend_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='CascadeEncoderDecoder',
 5 |     num_stages=2,
 6 |     pretrained='open-mmlab://resnet50_v1c',
 7 |     backbone=dict(
 8 |         type='ResNetV1c',
 9 |         depth=50,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         dilations=(1, 1, 1, 1),
13 |         strides=(1, 2, 2, 2),
14 |         norm_cfg=norm_cfg,
15 |         norm_eval=False,
16 |         style='pytorch',
17 |         contract_dilation=True),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[256, 512, 1024, 2048],
21 |         out_channels=256,
22 |         num_outs=4),
23 |     decode_head=[
24 |         dict(
25 |             type='FPNHead',
26 |             in_channels=[256, 256, 256, 256],
27 |             in_index=[0, 1, 2, 3],
28 |             feature_strides=[4, 8, 16, 32],
29 |             channels=128,
30 |             dropout_ratio=-1,
31 |             num_classes=19,
32 |             norm_cfg=norm_cfg,
33 |             align_corners=False,
34 |             loss_decode=dict(
35 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |         dict(
37 |             type='PointHead',
38 |             in_channels=[256],
39 |             in_index=[0],
40 |             channels=256,
41 |             num_fcs=3,
42 |             coarse_pred_each_layer=True,
43 |             dropout_ratio=-1,
44 |             num_classes=19,
45 |             align_corners=False,
46 |             loss_decode=dict(
47 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
48 |     ],
49 |     # model training and testing settings
50 |     train_cfg=dict(
51 |         num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
52 |     test_cfg=dict(
53 |         mode='whole',
54 |         subdivision_steps=2,
55 |         subdivision_num_points=8196,
56 |         scale_factor=2))
57 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_vit-b16_ln_mln.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
 6 |     backbone=dict(
 7 |         type='VisionTransformer',
 8 |         img_size=(512, 512),
 9 |         patch_size=16,
10 |         in_channels=3,
11 |         embed_dims=768,
12 |         num_layers=12,
13 |         num_heads=12,
14 |         mlp_ratio=4,
15 |         out_indices=(2, 5, 8, 11),
16 |         qkv_bias=True,
17 |         drop_rate=0.0,
18 |         attn_drop_rate=0.0,
19 |         drop_path_rate=0.0,
20 |         with_cls_token=True,
21 |         norm_cfg=dict(type='LN', eps=1e-6),
22 |         act_cfg=dict(type='GELU'),
23 |         norm_eval=False,
24 |         interpolate_mode='bicubic'),
25 |     neck=dict(
26 |         type='MultiLevelNeck',
27 |         in_channels=[768, 768, 768, 768],
28 |         out_channels=768,
29 |         scales=[4, 2, 1, 0.5]),
30 |     decode_head=dict(
31 |         type='UPerHead',
32 |         in_channels=[768, 768, 768, 768],
33 |         in_index=[0, 1, 2, 3],
34 |         pool_scales=(1, 2, 3, 6),
35 |         channels=512,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=768,
45 |         in_index=3,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))  # yapf: disable
58 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/fast_scnn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='FastSCNN',
 7 |         downsample_dw_channels=(32, 48),
 8 |         global_in_channels=64,
 9 |         global_block_channels=(64, 96, 128),
10 |         global_block_strides=(2, 2, 1),
11 |         global_out_channels=128,
12 |         higher_in_channels=64,
13 |         lower_in_channels=128,
14 |         fusion_out_channels=128,
15 |         out_indices=(0, 1, 2),
16 |         norm_cfg=norm_cfg,
17 |         align_corners=False),
18 |     decode_head=dict(
19 |         type='DepthwiseSeparableFCNHead',
20 |         in_channels=128,
21 |         channels=128,
22 |         concat_input=False,
23 |         num_classes=19,
24 |         in_index=-1,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
29 |     auxiliary_head=[
30 |         dict(
31 |             type='FCNHead',
32 |             in_channels=128,
33 |             channels=32,
34 |             num_convs=1,
35 |             num_classes=19,
36 |             in_index=-2,
37 |             norm_cfg=norm_cfg,
38 |             concat_input=False,
39 |             align_corners=False,
40 |             loss_decode=dict(
41 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
42 |         dict(
43 |             type='FCNHead',
44 |             in_channels=64,
45 |             channels=32,
46 |             num_convs=1,
47 |             num_classes=19,
48 |             in_index=-3,
49 |             norm_cfg=norm_cfg,
50 |             concat_input=False,
51 |             align_corners=False,
52 |             loss_decode=dict(
53 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
54 |     ],
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'LoveDADataset'
 3 | data_root = 'data/loveDA'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1024, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='img_dir/train',
41 |         ann_dir='ann_dir/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='img_dir/val',
47 |         ann_dir='ann_dir/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PotsdamDataset'
 3 | data_root = 'data/potsdam'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(512, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='img_dir/train',
41 |         ann_dir='ann_dir/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='img_dir/val',
47 |         ann_dir='ann_dir/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/vaihingen.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ISPRSDataset'
 3 | data_root = 'data/vaihingen'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(512, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='img_dir/train',
41 |         ann_dir='ann_dir/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='img_dir/val',
47 |         ann_dir='ann_dir/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/coco-stuff164k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'COCOStuffDataset'
 3 | data_root = 'data/coco_stuff164k'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='images/train2017',
41 |         ann_dir='annotations/train2017',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='images/val2017',
47 |         ann_dir='annotations/val2017',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/val2017',
53 |         ann_dir='annotations/val2017',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = '/home/ubuntu/2TB/dataset/cityscapes'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 1024)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 1024),
24 |         # img_scale=(1536, 768),
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img']),
33 |         ])
34 | ]
35 | data = dict(
36 |     samples_per_gpu=2,
37 |     workers_per_gpu=2,
38 |     train=dict(
39 |         type=dataset_type,
40 |         data_root=data_root,
41 |         img_dir='leftImg8bit/train',
42 |         ann_dir='gtFine/train',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         img_dir='leftImg8bit/val',
48 |         ann_dir='gtFine/val',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         data_root=data_root,
53 |         img_dir='leftImg8bit/val',
54 |         ann_dir='gtFine/val',
55 |         pipeline=test_pipeline))
56 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/ade20k_640x640.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = 'data/ade/ADEChallengeData2016'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (640, 640)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2560, 640),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='images/training',
41 |         ann_dir='annotations/training',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='images/validation',
47 |         ann_dir='annotations/validation',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/drive.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DRIVEDataset'
 3 | data_root = 'data/DRIVE'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (584, 565)
 7 | crop_size = (64, 64)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/hrf.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'HRFDataset'
 3 | data_root = 'data/HRF'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (2336, 3504)
 7 | crop_size = (256, 256)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/stare.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'STAREDataset'
 3 | data_root = 'data/STARE'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (605, 700)
 7 | crop_size = (128, 128)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/coco-stuff10k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'COCOStuffDataset'
 3 | data_root = 'data/coco_stuff10k'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         reduce_zero_label=True,
41 |         img_dir='images/train2014',
42 |         ann_dir='annotations/train2014',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         reduce_zero_label=True,
48 |         img_dir='images/test2014',
49 |         ann_dir='annotations/test2014',
50 |         pipeline=test_pipeline),
51 |     test=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         reduce_zero_label=True,
55 |         img_dir='images/test2014',
56 |         ann_dir='annotations/test2014',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/mmseg_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | import mmcv
 5 | from packaging.version import parse
 6 | 
 7 | from .version import __version__, version_info
 8 | 
 9 | MMCV_MIN = '1.3.13'
10 | MMCV_MAX = '1.6.0'
11 | 
12 | 
13 | def digit_version(version_str: str, length: int = 4):
14 |     """Convert a version string into a tuple of integers.
15 | 
16 |     This method is usually used for comparing two versions. For pre-release
17 |     versions: alpha < beta < rc.
18 | 
19 |     Args:
20 |         version_str (str): The version string.
21 |         length (int): The maximum number of version levels. Default: 4.
22 | 
23 |     Returns:
24 |         tuple[int]: The version info in digits (integers).
25 |     """
26 |     version = parse(version_str)
27 |     assert version.release, f'failed to parse version {version_str}'
28 |     release = list(version.release)
29 |     release = release[:length]
30 |     if len(release) < length:
31 |         release = release + [0] * (length - len(release))
32 |     if version.is_prerelease:
33 |         mapping = {'a': -3, 'b': -2, 'rc': -1}
34 |         val = -4
35 |         # version.pre can be None
36 |         if version.pre:
37 |             if version.pre[0] not in mapping:
38 |                 warnings.warn(f'unknown prerelease version {version.pre[0]}, '
39 |                               'version checking may go wrong')
40 |             else:
41 |                 val = mapping[version.pre[0]]
42 |             release.extend([val, version.pre[-1]])
43 |         else:
44 |             release.extend([val, 0])
45 | 
46 |     elif version.is_postrelease:
47 |         release.extend([1, version.post])
48 |     else:
49 |         release.extend([0, 0])
50 |     return tuple(release)
51 | 
52 | 
53 | mmcv_min_version = digit_version(MMCV_MIN)
54 | mmcv_max_version = digit_version(MMCV_MAX)
55 | mmcv_version = digit_version(mmcv.__version__)
56 | 
57 | 
58 | assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \
59 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
60 |     f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.'
61 | 
62 | __all__ = ['__version__', 'version_info', 'digit_version']
63 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/chase_db1.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ChaseDB1Dataset'
 3 | data_root = 'data/CHASE_DB1'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (960, 999)
 7 | crop_size = (128, 128)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/pascal_voc12.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalVOCDataset'
 3 | data_root = '/home/ubuntu/dataset/VOCdevkit/VOC2012'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='JPEGImages',
41 |         ann_dir='SegmentationClass',
42 |         split='ImageSets/Segmentation/train.txt',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         img_dir='JPEGImages',
48 |         ann_dir='SegmentationClass',
49 |         split='ImageSets/Segmentation/val.txt',
50 |         pipeline=test_pipeline),
51 |     test=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         img_dir='JPEGImages',
55 |         ann_dir='SegmentationClass',
56 |         split='ImageSets/Segmentation/val.txt',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/isaid.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'iSAIDDataset'
 3 | data_root = 'data/iSAID'
 4 | 
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | """
 8 | This crop_size setting is followed by the implementation of
 9 | `PointFlow: Flowing Semantics Through Points for Aerial Image
10 | Segmentation <https://arxiv.org/pdf/2103.06564.pdf>`_.
11 | """
12 | 
13 | crop_size = (896, 896)
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='LoadAnnotations'),
18 |     dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)),
19 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
20 |     dict(type='RandomFlip', prob=0.5),
21 |     dict(type='PhotoMetricDistortion'),
22 |     dict(type='Normalize', **img_norm_cfg),
23 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
24 |     dict(type='DefaultFormatBundle'),
25 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
26 | ]
27 | test_pipeline = [
28 |     dict(type='LoadImageFromFile'),
29 |     dict(
30 |         type='MultiScaleFlipAug',
31 |         img_scale=(896, 896),
32 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
33 |         flip=False,
34 |         transforms=[
35 |             dict(type='Resize', keep_ratio=True),
36 |             dict(type='RandomFlip'),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=4,
44 |     workers_per_gpu=4,
45 |     train=dict(
46 |         type=dataset_type,
47 |         data_root=data_root,
48 |         img_dir='img_dir/train',
49 |         ann_dir='ann_dir/train',
50 |         pipeline=train_pipeline),
51 |     val=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         img_dir='img_dir/val',
55 |         ann_dir='ann_dir/val',
56 |         pipeline=test_pipeline),
57 |     test=dict(
58 |         type=dataset_type,
59 |         data_root=data_root,
60 |         img_dir='img_dir/val',
61 |         ann_dir='ann_dir/val',
62 |         pipeline=test_pipeline))
63 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = '/home/ubuntu/2TB/dataset/ade/ADEChallengeData2016'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU.
29 |             dict(type='ResizeToMultiple', size_divisor=32),
30 |             dict(type='RandomFlip'),
31 |             dict(type='Normalize', **img_norm_cfg),
32 |             dict(type='ImageToTensor', keys=['img']),
33 |             dict(type='Collect', keys=['img']),
34 |         ])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_root=data_root,
42 |         img_dir='images/training',
43 |         ann_dir='annotations/training',
44 |         pipeline=train_pipeline),
45 |     val=dict(
46 |         type=dataset_type,
47 |         data_root=data_root,
48 |         img_dir='images/validation',
49 |         ann_dir='annotations/validation',
50 |         pipeline=test_pipeline),
51 |     test=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         img_dir='images/validation',
55 |         ann_dir='annotations/validation',
56 |         pipeline=test_pipeline))
57 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/pascal_context.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalContextDataset'
 3 | data_root = 'data/VOCdevkit/VOC2010/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | img_scale = (520, 520)
 8 | crop_size = (480, 480)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations'),
13 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=img_scale,
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=4,
39 |     workers_per_gpu=4,
40 |     train=dict(
41 |         type=dataset_type,
42 |         data_root=data_root,
43 |         img_dir='JPEGImages',
44 |         ann_dir='SegmentationClassContext',
45 |         split='ImageSets/SegmentationContext/train.txt',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         img_dir='JPEGImages',
51 |         ann_dir='SegmentationClassContext',
52 |         split='ImageSets/SegmentationContext/val.txt',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='JPEGImages',
58 |         ann_dir='SegmentationClassContext',
59 |         split='ImageSets/SegmentationContext/val.txt',
60 |         pipeline=test_pipeline))
61 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/datasets/pascal_context_59.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalContextDataset59'
 3 | data_root = 'data/VOCdevkit/VOC2010/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | img_scale = (520, 520)
 8 | crop_size = (480, 480)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations', reduce_zero_label=True),
13 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=img_scale,
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=4,
39 |     workers_per_gpu=4,
40 |     train=dict(
41 |         type=dataset_type,
42 |         data_root=data_root,
43 |         img_dir='JPEGImages',
44 |         ann_dir='SegmentationClassContext',
45 |         split='ImageSets/SegmentationContext/train.txt',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         img_dir='JPEGImages',
51 |         ann_dir='SegmentationClassContext',
52 |         split='ImageSets/SegmentationContext/val.txt',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='JPEGImages',
58 |         ann_dir='SegmentationClassContext',
59 |         split='ImageSets/SegmentationContext/val.txt',
60 |         pipeline=test_pipeline))
61 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/bisenetv1_r18-d32.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='BiSeNetV1',
 7 |         in_channels=3,
 8 |         context_channels=(128, 256, 512),
 9 |         spatial_channels=(64, 64, 64, 128),
10 |         out_indices=(0, 1, 2),
11 |         out_channels=256,
12 |         backbone_cfg=dict(
13 |             type='ResNet',
14 |             in_channels=3,
15 |             depth=18,
16 |             num_stages=4,
17 |             out_indices=(0, 1, 2, 3),
18 |             dilations=(1, 1, 1, 1),
19 |             strides=(1, 2, 2, 2),
20 |             norm_cfg=norm_cfg,
21 |             norm_eval=False,
22 |             style='pytorch',
23 |             contract_dilation=True),
24 |         norm_cfg=norm_cfg,
25 |         align_corners=False,
26 |         init_cfg=None),
27 |     decode_head=dict(
28 |         type='FCNHead',
29 |         in_channels=256,
30 |         in_index=0,
31 |         channels=256,
32 |         num_convs=1,
33 |         concat_input=False,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     auxiliary_head=[
41 |         dict(
42 |             type='FCNHead',
43 |             in_channels=128,
44 |             channels=64,
45 |             num_convs=1,
46 |             num_classes=19,
47 |             in_index=1,
48 |             norm_cfg=norm_cfg,
49 |             concat_input=False,
50 |             align_corners=False,
51 |             loss_decode=dict(
52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
53 |         dict(
54 |             type='FCNHead',
55 |             in_channels=128,
56 |             channels=64,
57 |             num_convs=1,
58 |             num_classes=19,
59 |             in_index=2,
60 |             norm_cfg=norm_cfg,
61 |             concat_input=False,
62 |             align_corners=False,
63 |             loss_decode=dict(
64 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
65 |     ],
66 |     # model training and testing settings
67 |     train_cfg=dict(),
68 |     test_cfg=dict(mode='whole'))
69 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/se_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from .make_divisible import make_divisible
 7 | 
 8 | 
 9 | class SELayer(nn.Module):
10 |     """Squeeze-and-Excitation Module.
11 | 
12 |     Args:
13 |         channels (int): The input (and output) channels of the SE layer.
14 |         ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
15 |             ``int(channels/ratio)``. Default: 16.
16 |         conv_cfg (None or dict): Config dict for convolution layer.
17 |             Default: None, which means using conv2d.
18 |         act_cfg (dict or Sequence[dict]): Config dict for activation layer.
19 |             If act_cfg is a dict, two activation layers will be configured
20 |             by this dict. If act_cfg is a sequence of dicts, the first
21 |             activation layer will be configured by the first dict and the
22 |             second activation layer will be configured by the second dict.
23 |             Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
24 |             divisor=6.0)).
25 |     """
26 | 
27 |     def __init__(self,
28 |                  channels,
29 |                  ratio=16,
30 |                  conv_cfg=None,
31 |                  act_cfg=(dict(type='ReLU'),
32 |                           dict(type='HSigmoid', bias=3.0, divisor=6.0))):
33 |         super(SELayer, self).__init__()
34 |         if isinstance(act_cfg, dict):
35 |             act_cfg = (act_cfg, act_cfg)
36 |         assert len(act_cfg) == 2
37 |         assert mmcv.is_tuple_of(act_cfg, dict)
38 |         self.global_avgpool = nn.AdaptiveAvgPool2d(1)
39 |         self.conv1 = ConvModule(
40 |             in_channels=channels,
41 |             out_channels=make_divisible(channels // ratio, 8),
42 |             kernel_size=1,
43 |             stride=1,
44 |             conv_cfg=conv_cfg,
45 |             act_cfg=act_cfg[0])
46 |         self.conv2 = ConvModule(
47 |             in_channels=make_divisible(channels // ratio, 8),
48 |             out_channels=channels,
49 |             kernel_size=1,
50 |             stride=1,
51 |             conv_cfg=conv_cfg,
52 |             act_cfg=act_cfg[1])
53 | 
54 |     def forward(self, x):
55 |         out = self.global_avgpool(x)
56 |         out = self.conv1(out)
57 |         out = self.conv2(out)
58 |         return x * out
59 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/icnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='ICNet',
 7 |         backbone_cfg=dict(
 8 |             type='ResNetV1c',
 9 |             in_channels=3,
10 |             depth=50,
11 |             num_stages=4,
12 |             out_indices=(0, 1, 2, 3),
13 |             dilations=(1, 1, 2, 4),
14 |             strides=(1, 2, 1, 1),
15 |             norm_cfg=norm_cfg,
16 |             norm_eval=False,
17 |             style='pytorch',
18 |             contract_dilation=True),
19 |         in_channels=3,
20 |         layer_channels=(512, 2048),
21 |         light_branch_middle_channels=32,
22 |         psp_out_channels=512,
23 |         out_channels=(64, 256, 256),
24 |         norm_cfg=norm_cfg,
25 |         align_corners=False,
26 |     ),
27 |     neck=dict(
28 |         type='ICNeck',
29 |         in_channels=(64, 256, 256),
30 |         out_channels=128,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False),
33 |     decode_head=dict(
34 |         type='FCNHead',
35 |         in_channels=128,
36 |         channels=128,
37 |         num_convs=1,
38 |         in_index=2,
39 |         dropout_ratio=0,
40 |         num_classes=19,
41 |         norm_cfg=norm_cfg,
42 |         concat_input=False,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
46 |     auxiliary_head=[
47 |         dict(
48 |             type='FCNHead',
49 |             in_channels=128,
50 |             channels=128,
51 |             num_convs=1,
52 |             num_classes=19,
53 |             in_index=0,
54 |             norm_cfg=norm_cfg,
55 |             concat_input=False,
56 |             align_corners=False,
57 |             loss_decode=dict(
58 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
59 |         dict(
60 |             type='FCNHead',
61 |             in_channels=128,
62 |             channels=128,
63 |             num_convs=1,
64 |             num_classes=19,
65 |             in_index=1,
66 |             norm_cfg=norm_cfg,
67 |             concat_input=False,
68 |             align_corners=False,
69 |             loss_decode=dict(
70 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
71 |     ],
72 |     # model training and testing settings
73 |     train_cfg=dict(),
74 |     test_cfg=dict(mode='whole'))
75 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/ocrnet_hr18.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='CascadeEncoderDecoder',
 5 |     num_stages=2,
 6 |     pretrained='open-mmlab://msra/hrnetv2_w18',
 7 |     backbone=dict(
 8 |         type='HRNet',
 9 |         norm_cfg=norm_cfg,
10 |         norm_eval=False,
11 |         extra=dict(
12 |             stage1=dict(
13 |                 num_modules=1,
14 |                 num_branches=1,
15 |                 block='BOTTLENECK',
16 |                 num_blocks=(4, ),
17 |                 num_channels=(64, )),
18 |             stage2=dict(
19 |                 num_modules=1,
20 |                 num_branches=2,
21 |                 block='BASIC',
22 |                 num_blocks=(4, 4),
23 |                 num_channels=(18, 36)),
24 |             stage3=dict(
25 |                 num_modules=4,
26 |                 num_branches=3,
27 |                 block='BASIC',
28 |                 num_blocks=(4, 4, 4),
29 |                 num_channels=(18, 36, 72)),
30 |             stage4=dict(
31 |                 num_modules=3,
32 |                 num_branches=4,
33 |                 block='BASIC',
34 |                 num_blocks=(4, 4, 4, 4),
35 |                 num_channels=(18, 36, 72, 144)))),
36 |     decode_head=[
37 |         dict(
38 |             type='FCNHead',
39 |             in_channels=[18, 36, 72, 144],
40 |             channels=sum([18, 36, 72, 144]),
41 |             in_index=(0, 1, 2, 3),
42 |             input_transform='resize_concat',
43 |             kernel_size=1,
44 |             num_convs=1,
45 |             concat_input=False,
46 |             dropout_ratio=-1,
47 |             num_classes=19,
48 |             norm_cfg=norm_cfg,
49 |             align_corners=False,
50 |             loss_decode=dict(
51 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |         dict(
53 |             type='OCRHead',
54 |             in_channels=[18, 36, 72, 144],
55 |             in_index=(0, 1, 2, 3),
56 |             input_transform='resize_concat',
57 |             channels=512,
58 |             ocr_channels=256,
59 |             dropout_ratio=-1,
60 |             num_classes=19,
61 |             norm_cfg=norm_cfg,
62 |             align_corners=False,
63 |             loss_decode=dict(
64 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
65 |     ],
66 |     # model training and testing settings
67 |     train_cfg=dict(),
68 |     test_cfg=dict(mode='whole'))
69 | 


--------------------------------------------------------------------------------
/get_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config
 5 | from mmcv.cnn import get_model_complexity_info
 6 | 
 7 | from mmseg_custom.models import build_segmentor
 8 | import time
 9 | import torch
10 | from tqdm import tqdm
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(
14 |         description='Get the FLOPs of a segmentor')
15 |     parser.add_argument('config', help='train config file path')
16 |     parser.add_argument(
17 |         '--shape',
18 |         type=int,
19 |         nargs='+',
20 |         # default=[2048, 1024],
21 |         default=[64, 64],
22 |         help='input image size')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | def main():
28 | 
29 |     args = parse_args()
30 | 
31 |     if len(args.shape) == 1:
32 |         input_shape = (3, args.shape[0], args.shape[0])
33 |     elif len(args.shape) == 2:
34 |         input_shape = (3, ) + tuple(args.shape)
35 |     else:
36 |         raise ValueError('invalid input shape')
37 | 
38 |     cfg = Config.fromfile(args.config)
39 |     cfg.model.pretrained = None
40 |     model = build_segmentor(
41 |         cfg.model,
42 |         train_cfg=cfg.get('train_cfg'),
43 |         test_cfg=cfg.get('test_cfg')).cuda()
44 |     model.eval()
45 | 
46 |     if hasattr(model, 'forward_dummy'):
47 |         model.forward = model.forward_dummy
48 |     else:
49 |         raise NotImplementedError(
50 |             'FLOPs counter is currently not currently supported with {}'.
51 |             format(model.__class__.__name__))
52 |     with torch.no_grad():
53 |         # flops, params = get_model_complexity_info(model, input_shape, as_strings=False,)
54 |         flops, params = get_model_complexity_info(model, input_shape, as_strings=True,)
55 |     split_line = '=' * 30
56 |     print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
57 |         split_line, input_shape, flops, params))
58 |     print('!!!Please be cautious if you use the results in papers. '
59 |           'You may need to check if all ops are supported and verify that the '
60 |           'flops computation is correct.')
61 |     # exit()
62 |     fake_input = torch.rand(1, 3, args.shape[-2], args.shape[-1]).cuda()
63 |     time_list = []
64 |     for _ in tqdm(range(1000)):
65 |         t0 = time.perf_counter()
66 |         _ = model(fake_input) 
67 |         used_time = time.perf_counter() - t0
68 |         time_list.append(used_time)
69 |     print(sum(time_list) / len(time_list))
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/setr_pup.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 3 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
 7 |     backbone=dict(
 8 |         type='VisionTransformer',
 9 |         img_size=(768, 768),
10 |         patch_size=16,
11 |         in_channels=3,
12 |         embed_dims=1024,
13 |         num_layers=24,
14 |         num_heads=16,
15 |         out_indices=(9, 14, 19, 23),
16 |         drop_rate=0.1,
17 |         norm_cfg=backbone_norm_cfg,
18 |         with_cls_token=True,
19 |         interpolate_mode='bilinear',
20 |     ),
21 |     decode_head=dict(
22 |         type='SETRUPHead',
23 |         in_channels=1024,
24 |         channels=256,
25 |         in_index=3,
26 |         num_classes=19,
27 |         dropout_ratio=0,
28 |         norm_cfg=norm_cfg,
29 |         num_convs=4,
30 |         up_scale=2,
31 |         kernel_size=3,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=[
36 |         dict(
37 |             type='SETRUPHead',
38 |             in_channels=1024,
39 |             channels=256,
40 |             in_index=0,
41 |             num_classes=19,
42 |             dropout_ratio=0,
43 |             norm_cfg=norm_cfg,
44 |             num_convs=1,
45 |             up_scale=4,
46 |             kernel_size=3,
47 |             align_corners=False,
48 |             loss_decode=dict(
49 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |         dict(
51 |             type='SETRUPHead',
52 |             in_channels=1024,
53 |             channels=256,
54 |             in_index=1,
55 |             num_classes=19,
56 |             dropout_ratio=0,
57 |             norm_cfg=norm_cfg,
58 |             num_convs=1,
59 |             up_scale=4,
60 |             kernel_size=3,
61 |             align_corners=False,
62 |             loss_decode=dict(
63 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
64 |         dict(
65 |             type='SETRUPHead',
66 |             in_channels=1024,
67 |             channels=256,
68 |             in_index=2,
69 |             num_classes=19,
70 |             dropout_ratio=0,
71 |             norm_cfg=norm_cfg,
72 |             num_convs=1,
73 |             up_scale=4,
74 |             kernel_size=3,
75 |             align_corners=False,
76 |             loss_decode=dict(
77 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
78 |     ],
79 |     train_cfg=dict(),
80 |     test_cfg=dict(mode='whole'))
81 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/setr_naive.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 3 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
 7 |     backbone=dict(
 8 |         type='VisionTransformer',
 9 |         img_size=(768, 768),
10 |         patch_size=16,
11 |         in_channels=3,
12 |         embed_dims=1024,
13 |         num_layers=24,
14 |         num_heads=16,
15 |         out_indices=(9, 14, 19, 23),
16 |         drop_rate=0.1,
17 |         norm_cfg=backbone_norm_cfg,
18 |         with_cls_token=True,
19 |         interpolate_mode='bilinear',
20 |     ),
21 |     decode_head=dict(
22 |         type='SETRUPHead',
23 |         in_channels=1024,
24 |         channels=256,
25 |         in_index=3,
26 |         num_classes=19,
27 |         dropout_ratio=0,
28 |         norm_cfg=norm_cfg,
29 |         num_convs=1,
30 |         up_scale=4,
31 |         kernel_size=1,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=[
36 |         dict(
37 |             type='SETRUPHead',
38 |             in_channels=1024,
39 |             channels=256,
40 |             in_index=0,
41 |             num_classes=19,
42 |             dropout_ratio=0,
43 |             norm_cfg=norm_cfg,
44 |             num_convs=1,
45 |             up_scale=4,
46 |             kernel_size=1,
47 |             align_corners=False,
48 |             loss_decode=dict(
49 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |         dict(
51 |             type='SETRUPHead',
52 |             in_channels=1024,
53 |             channels=256,
54 |             in_index=1,
55 |             num_classes=19,
56 |             dropout_ratio=0,
57 |             norm_cfg=norm_cfg,
58 |             num_convs=1,
59 |             up_scale=4,
60 |             kernel_size=1,
61 |             align_corners=False,
62 |             loss_decode=dict(
63 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
64 |         dict(
65 |             type='SETRUPHead',
66 |             in_channels=1024,
67 |             channels=256,
68 |             in_index=2,
69 |             num_classes=19,
70 |             dropout_ratio=0,
71 |             norm_cfg=norm_cfg,
72 |             num_convs=1,
73 |             up_scale=4,
74 |             kernel_size=1,
75 |             align_corners=False,
76 |             loss_decode=dict(
77 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4))
78 |     ],
79 |     train_cfg=dict(),
80 |     test_cfg=dict(mode='whole'))
81 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/upernet_swin_AS.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
 4 | LOG_DIR = None
 5 | # LOG_DIR = '/home/ubuntu/code/ResolutionDet/mmseg_exp/log'
 6 | ALIGN_CORNERS = False
 7 | SAMPLER_NUM=3
 8 | 
 9 | model = dict(
10 |     # type='EncoderDecoder',
11 |     type='EncoderDecoderWithSaliencySamplerPixelRelation',
12 |     edge_loss_weight=100,
13 |     pixel_relation=[],
14 |     pretrained=None,
15 |     backbone=dict(
16 |         use_checkpoint=True,
17 |         align_corners=ALIGN_CORNERS,
18 |         log_dir=LOG_DIR, 
19 |         sampler_num =SAMPLER_NUM, 
20 |         sampler_strides=(1, 1, 1), 
21 |         sampler_paddings=(5, 5, 5), 
22 |         sampler_fwhm=(3, 3, 3), 
23 |         sampler_mode=('avgpsp_semantic_edge', 'avgpsp_semantic_edge', 'avgpsp_semantic_edge'),
24 |         # sampler_mode=('lhpf', 'lhpf', 'lhpf'),
25 |         # psp_ratio=16,
26 |         type='SwinTransformerSaliencySampler',
27 |         # type='SwinTransformer',
28 | 
29 |         pretrain_img_size=224,
30 |         # embed_dims=192,
31 |         embed_dims=96,
32 |         patch_size=4,
33 |         window_size=7,
34 |         mlp_ratio=4,
35 |         depths=[2, 2, 6, 2],
36 |         num_heads=[3, 6, 12, 24],
37 |         strides=(4, 2, 2, 2),
38 |         out_indices=(0, 1, 2, 3),
39 |         qkv_bias=True,
40 |         qk_scale=None,
41 |         patch_norm=True,
42 |         drop_rate=0.,
43 |         attn_drop_rate=0.,
44 |         drop_path_rate=0.3,
45 |         use_abs_pos_embed=False,
46 |         act_cfg=dict(type='GELU'),
47 |         norm_cfg=backbone_norm_cfg),
48 |     decode_head=dict(
49 |         # type='UPerHead',
50 |         type='UPerHeadASAlign',
51 |         in_channels=[96, 192, 384, 768],
52 |         # in_channels=[192, 384, 768, 1536],
53 |         in_index=[0, 1, 2, 3],
54 |         pool_scales=(1, 2, 3, 6),
55 |         # channels=512,
56 |         channels=256,
57 |         dropout_ratio=0.1,
58 |         num_classes=19,
59 |         norm_cfg=norm_cfg,
60 |         align_corners=False,
61 |         loss_decode=dict(
62 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
63 |     auxiliary_head=dict(
64 |         type='FCNHead',
65 |         in_channels=384,
66 |         in_index=2,
67 |         channels=256,
68 |         num_convs=1,
69 |         concat_input=False,
70 |         dropout_ratio=0.1,
71 |         num_classes=19,
72 |         norm_cfg=norm_cfg,
73 |         align_corners=False,
74 |         loss_decode=dict(
75 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
76 |     # model training and testing settings
77 |     train_cfg=dict(),
78 |     test_cfg=dict(mode='whole'))
79 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/bisenetv2.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='BiSeNetV2',
 8 |         detail_channels=(64, 64, 128),
 9 |         semantic_channels=(16, 32, 64, 128),
10 |         semantic_expansion_ratio=6,
11 |         bga_channels=128,
12 |         out_indices=(0, 1, 2, 3, 4),
13 |         init_cfg=None,
14 |         align_corners=False),
15 |     decode_head=dict(
16 |         type='FCNHead',
17 |         in_channels=128,
18 |         in_index=0,
19 |         channels=1024,
20 |         num_convs=1,
21 |         concat_input=False,
22 |         dropout_ratio=0.1,
23 |         num_classes=19,
24 |         norm_cfg=norm_cfg,
25 |         align_corners=False,
26 |         loss_decode=dict(
27 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
28 |     auxiliary_head=[
29 |         dict(
30 |             type='FCNHead',
31 |             in_channels=16,
32 |             channels=16,
33 |             num_convs=2,
34 |             num_classes=19,
35 |             in_index=1,
36 |             norm_cfg=norm_cfg,
37 |             concat_input=False,
38 |             align_corners=False,
39 |             loss_decode=dict(
40 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
41 |         dict(
42 |             type='FCNHead',
43 |             in_channels=32,
44 |             channels=64,
45 |             num_convs=2,
46 |             num_classes=19,
47 |             in_index=2,
48 |             norm_cfg=norm_cfg,
49 |             concat_input=False,
50 |             align_corners=False,
51 |             loss_decode=dict(
52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
53 |         dict(
54 |             type='FCNHead',
55 |             in_channels=64,
56 |             channels=256,
57 |             num_convs=2,
58 |             num_classes=19,
59 |             in_index=3,
60 |             norm_cfg=norm_cfg,
61 |             concat_input=False,
62 |             align_corners=False,
63 |             loss_decode=dict(
64 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
65 |         dict(
66 |             type='FCNHead',
67 |             in_channels=128,
68 |             channels=1024,
69 |             num_convs=2,
70 |             num_classes=19,
71 |             in_index=4,
72 |             norm_cfg=norm_cfg,
73 |             concat_input=False,
74 |             align_corners=False,
75 |             loss_decode=dict(
76 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
77 |     ],
78 |     # model training and testing settings
79 |     train_cfg=dict(),
80 |     test_cfg=dict(mode='whole'))
81 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/stdc.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='BN', requires_grad=True)
 2 | model = dict(
 3 |     type='EncoderDecoder',
 4 |     pretrained=None,
 5 |     backbone=dict(
 6 |         type='STDCContextPathNet',
 7 |         backbone_cfg=dict(
 8 |             type='STDCNet',
 9 |             stdc_type='STDCNet1',
10 |             in_channels=3,
11 |             channels=(32, 64, 256, 512, 1024),
12 |             bottleneck_type='cat',
13 |             num_convs=4,
14 |             norm_cfg=norm_cfg,
15 |             act_cfg=dict(type='ReLU'),
16 |             with_final_conv=False),
17 |         last_in_channels=(1024, 512),
18 |         out_channels=128,
19 |         ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)),
20 |     decode_head=dict(
21 |         type='FCNHead',
22 |         in_channels=256,
23 |         channels=256,
24 |         num_convs=1,
25 |         num_classes=19,
26 |         in_index=3,
27 |         concat_input=False,
28 |         dropout_ratio=0.1,
29 |         norm_cfg=norm_cfg,
30 |         align_corners=True,
31 |         sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     auxiliary_head=[
35 |         dict(
36 |             type='FCNHead',
37 |             in_channels=128,
38 |             channels=64,
39 |             num_convs=1,
40 |             num_classes=19,
41 |             in_index=2,
42 |             norm_cfg=norm_cfg,
43 |             concat_input=False,
44 |             align_corners=False,
45 |             sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
46 |             loss_decode=dict(
47 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
48 |         dict(
49 |             type='FCNHead',
50 |             in_channels=128,
51 |             channels=64,
52 |             num_convs=1,
53 |             num_classes=19,
54 |             in_index=1,
55 |             norm_cfg=norm_cfg,
56 |             concat_input=False,
57 |             align_corners=False,
58 |             sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
59 |             loss_decode=dict(
60 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
61 |         dict(
62 |             type='STDCHead',
63 |             in_channels=256,
64 |             channels=64,
65 |             num_convs=1,
66 |             num_classes=2,
67 |             boundary_threshold=0.1,
68 |             in_index=0,
69 |             norm_cfg=norm_cfg,
70 |             concat_input=False,
71 |             align_corners=True,
72 |             loss_decode=[
73 |                 dict(
74 |                     type='CrossEntropyLoss',
75 |                     loss_name='loss_ce',
76 |                     use_sigmoid=True,
77 |                     loss_weight=1.0),
78 |                 dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0)
79 |             ]),
80 |     ],
81 |     # model training and testing settings
82 |     train_cfg=dict(),
83 |     test_cfg=dict(mode='whole'))
84 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/_base_/models/setr_mla.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 3 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
 7 |     backbone=dict(
 8 |         type='VisionTransformer',
 9 |         img_size=(768, 768),
10 |         patch_size=16,
11 |         in_channels=3,
12 |         embed_dims=1024,
13 |         num_layers=24,
14 |         num_heads=16,
15 |         out_indices=(5, 11, 17, 23),
16 |         drop_rate=0.1,
17 |         norm_cfg=backbone_norm_cfg,
18 |         with_cls_token=False,
19 |         interpolate_mode='bilinear',
20 |     ),
21 |     neck=dict(
22 |         type='MLANeck',
23 |         in_channels=[1024, 1024, 1024, 1024],
24 |         out_channels=256,
25 |         norm_cfg=norm_cfg,
26 |         act_cfg=dict(type='ReLU'),
27 |     ),
28 |     decode_head=dict(
29 |         type='SETRMLAHead',
30 |         in_channels=(256, 256, 256, 256),
31 |         channels=512,
32 |         in_index=(0, 1, 2, 3),
33 |         dropout_ratio=0,
34 |         mla_channels=128,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     auxiliary_head=[
41 |         dict(
42 |             type='FCNHead',
43 |             in_channels=256,
44 |             channels=256,
45 |             in_index=0,
46 |             dropout_ratio=0,
47 |             num_convs=0,
48 |             kernel_size=1,
49 |             concat_input=False,
50 |             num_classes=19,
51 |             align_corners=False,
52 |             loss_decode=dict(
53 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54 |         dict(
55 |             type='FCNHead',
56 |             in_channels=256,
57 |             channels=256,
58 |             in_index=1,
59 |             dropout_ratio=0,
60 |             num_convs=0,
61 |             kernel_size=1,
62 |             concat_input=False,
63 |             num_classes=19,
64 |             align_corners=False,
65 |             loss_decode=dict(
66 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
67 |         dict(
68 |             type='FCNHead',
69 |             in_channels=256,
70 |             channels=256,
71 |             in_index=2,
72 |             dropout_ratio=0,
73 |             num_convs=0,
74 |             kernel_size=1,
75 |             concat_input=False,
76 |             num_classes=19,
77 |             align_corners=False,
78 |             loss_decode=dict(
79 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
80 |         dict(
81 |             type='FCNHead',
82 |             in_channels=256,
83 |             channels=256,
84 |             in_index=3,
85 |             dropout_ratio=0,
86 |             num_convs=0,
87 |             kernel_size=1,
88 |             concat_input=False,
89 |             num_classes=19,
90 |             align_corners=False,
91 |             loss_decode=dict(
92 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
93 |     ],
94 |     train_cfg=dict(),
95 |     test_cfg=dict(mode='whole'))
96 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from __future__ import division
 3 | from typing import Iterator, Optional
 4 | 
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | from torch.utils.data import DistributedSampler as _DistributedSampler
 8 | 
 9 | from mmseg.core.utils import sync_random_seed
10 | from mmseg.utils import get_device
11 | 
12 | 
13 | class DistributedSampler(_DistributedSampler):
14 |     """DistributedSampler inheriting from
15 |     `torch.utils.data.DistributedSampler`.
16 | 
17 |     Args:
18 |         datasets (Dataset): the dataset will be loaded.
19 |         num_replicas (int, optional): Number of processes participating in
20 |             distributed training. By default, world_size is retrieved from the
21 |             current distributed group.
22 |         rank (int, optional):  Rank of the current process within num_replicas.
23 |             By default, rank is retrieved from the current distributed group.
24 |         shuffle (bool): If True (default), sampler will shuffle the indices.
25 |         seed (int): random seed used to shuffle the sampler if
26 |             :attr:`shuffle=True`. This number should be identical across all
27 |             processes in the distributed group. Default: ``0``.
28 |     """
29 | 
30 |     def __init__(self,
31 |                  dataset: Dataset,
32 |                  num_replicas: Optional[int] = None,
33 |                  rank: Optional[int] = None,
34 |                  shuffle: bool = True,
35 |                  seed=0) -> None:
36 |         super().__init__(
37 |             dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
38 | 
39 |         # In distributed sampling, different ranks should sample
40 |         # non-overlapped data in the dataset. Therefore, this function
41 |         # is used to make sure that each rank shuffles the data indices
42 |         # in the same order based on the same seed. Then different ranks
43 |         # could use different indices to select non-overlapped data from the
44 |         # same data list.
45 |         device = get_device()
46 |         self.seed = sync_random_seed(seed, device)
47 | 
48 |     def __iter__(self) -> Iterator:
49 |         """
50 |          Yields:
51 |             Iterator: iterator of indices for rank.
52 |         """
53 |         # deterministically shuffle based on epoch
54 |         if self.shuffle:
55 |             g = torch.Generator()
56 |             # When :attr:`shuffle=True`, this ensures all replicas
57 |             # use a different random ordering for each epoch.
58 |             # Otherwise, the next iteration of this sampler will
59 |             # yield the same ordering.
60 |             g.manual_seed(self.epoch + self.seed)
61 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
62 |         else:
63 |             indices = torch.arange(len(self.dataset)).tolist()
64 | 
65 |         # add extra samples to make it evenly divisible
66 |         indices += indices[:(self.total_size - len(indices))]
67 |         assert len(indices) == self.total_size
68 | 
69 |         # subsample
70 |         indices = indices[self.rank:self.total_size:self.num_replicas]
71 |         assert len(indices) == self.num_samples
72 | 
73 |         return iter(indices)
74 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/isaid.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | import mmcv
 4 | from mmcv.utils import print_log
 5 | 
 6 | from mmseg.utils import get_root_logger
 7 | from .builder import DATASETS
 8 | from .custom import CustomDataset
 9 | 
10 | 
11 | @DATASETS.register_module()
12 | class iSAIDDataset(CustomDataset):
13 |     """ iSAID: A Large-scale Dataset for Instance Segmentation in Aerial Images
14 |     In segmentation map annotation for iSAID dataset, which is included
15 |     in 16 categories. ``reduce_zero_label`` is fixed to False. The
16 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
17 |     '_manual1.png'.
18 |     """
19 | 
20 |     CLASSES = ('background', 'ship', 'store_tank', 'baseball_diamond',
21 |                'tennis_court', 'basketball_court', 'Ground_Track_Field',
22 |                'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter',
23 |                'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane',
24 |                'Harbor')
25 | 
26 |     PALETTE = [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127],
27 |                [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127],
28 |                [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127],
29 |                [0, 127, 191], [0, 127, 255], [0, 100, 155]]
30 | 
31 |     def __init__(self, **kwargs):
32 |         super(iSAIDDataset, self).__init__(
33 |             img_suffix='.png',
34 |             seg_map_suffix='.png',
35 |             ignore_index=255,
36 |             **kwargs)
37 |         assert self.file_client.exists(self.img_dir)
38 | 
39 |     def load_annotations(self,
40 |                          img_dir,
41 |                          img_suffix,
42 |                          ann_dir,
43 |                          seg_map_suffix=None,
44 |                          split=None):
45 |         """Load annotation from directory.
46 | 
47 |         Args:
48 |             img_dir (str): Path to image directory
49 |             img_suffix (str): Suffix of images.
50 |             ann_dir (str|None): Path to annotation directory.
51 |             seg_map_suffix (str|None): Suffix of segmentation maps.
52 |             split (str|None): Split txt file. If split is specified, only file
53 |                 with suffix in the splits will be loaded. Otherwise, all images
54 |                 in img_dir/ann_dir will be loaded. Default: None
55 | 
56 |         Returns:
57 |             list[dict]: All image info of dataset.
58 |         """
59 | 
60 |         img_infos = []
61 |         if split is not None:
62 |             with open(split) as f:
63 |                 for line in f:
64 |                     name = line.strip()
65 |                     img_info = dict(filename=name + img_suffix)
66 |                     if ann_dir is not None:
67 |                         ann_name = name + '_instance_color_RGB'
68 |                         seg_map = ann_name + seg_map_suffix
69 |                         img_info['ann'] = dict(seg_map=seg_map)
70 |                     img_infos.append(img_info)
71 |         else:
72 |             for img in mmcv.scandir(img_dir, img_suffix, recursive=True):
73 |                 img_info = dict(filename=img)
74 |                 if ann_dir is not None:
75 |                     seg_img = img
76 |                     seg_map = seg_img.replace(
77 |                         img_suffix, '_instance_color_RGB' + seg_map_suffix)
78 |                     img_info['ann'] = dict(seg_map=seg_map)
79 |                 img_infos.append(img_info)
80 | 
81 |         print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger())
82 |         return img_infos
83 | 


--------------------------------------------------------------------------------
/mmseg_custom/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | from PIL import Image
 7 | 
 8 | from .builder import DATASETS
 9 | from .custom import CustomDataset
10 | 
11 | 
12 | @DATASETS.register_module()
13 | class LoveDADataset(CustomDataset):
14 |     """LoveDA dataset.
15 | 
16 |     In segmentation map annotation for LoveDA, 0 is the ignore index.
17 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
18 |     ``seg_map_suffix`` are both fixed to '.png'.
19 |     """
20 |     CLASSES = ('background', 'building', 'road', 'water', 'barren', 'forest',
21 |                'agricultural')
22 | 
23 |     PALETTE = [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255],
24 |                [159, 129, 183], [0, 255, 0], [255, 195, 128]]
25 | 
26 |     def __init__(self, **kwargs):
27 |         super(LoveDADataset, self).__init__(
28 |             img_suffix='.png',
29 |             seg_map_suffix='.png',
30 |             reduce_zero_label=True,
31 |             **kwargs)
32 | 
33 |     def results2img(self, results, imgfile_prefix, indices=None):
34 |         """Write the segmentation results to images.
35 | 
36 |         Args:
37 |             results (list[ndarray]): Testing results of the
38 |                 dataset.
39 |             imgfile_prefix (str): The filename prefix of the png files.
40 |                 If the prefix is "somepath/xxx",
41 |                 the png files will be named "somepath/xxx.png".
42 |             indices (list[int], optional): Indices of input results, if not
43 |                 set, all the indices of the dataset will be used.
44 |                 Default: None.
45 | 
46 |         Returns:
47 |             list[str: str]: result txt files which contains corresponding
48 |             semantic segmentation images.
49 |         """
50 | 
51 |         mmcv.mkdir_or_exist(imgfile_prefix)
52 |         result_files = []
53 |         for result, idx in zip(results, indices):
54 | 
55 |             filename = self.img_infos[idx]['filename']
56 |             basename = osp.splitext(osp.basename(filename))[0]
57 | 
58 |             png_filename = osp.join(imgfile_prefix, f'{basename}.png')
59 | 
60 |             # The  index range of official requirement is from 0 to 6.
61 |             output = Image.fromarray(result.astype(np.uint8))
62 |             output.save(png_filename)
63 |             result_files.append(png_filename)
64 | 
65 |         return result_files
66 | 
67 |     def format_results(self, results, imgfile_prefix, indices=None):
68 |         """Format the results into dir (standard format for LoveDA evaluation).
69 | 
70 |         Args:
71 |             results (list): Testing results of the dataset.
72 |             imgfile_prefix (str): The prefix of images files. It
73 |                 includes the file path and the prefix of filename, e.g.,
74 |                 "a/b/prefix".
75 |             indices (list[int], optional): Indices of input results,
76 |                 if not set, all the indices of the dataset will be used.
77 |                 Default: None.
78 | 
79 |         Returns:
80 |             tuple: (result_files, tmp_dir), result_files is a list containing
81 |                 the image paths, tmp_dir is the temporal directory created
82 |                 for saving json/png files when img_prefix is not specified.
83 |         """
84 |         if indices is None:
85 |             indices = list(range(len(self)))
86 | 
87 |         assert isinstance(results, list), 'results must be a list.'
88 |         assert isinstance(indices, list), 'indices must be a list.'
89 | 
90 |         result_files = self.results2img(results, imgfile_prefix, indices)
91 | 
92 |         return result_files
93 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/res_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.cnn import build_conv_layer, build_norm_layer
 3 | from mmcv.runner import Sequential
 4 | from torch import nn as nn
 5 | 
 6 | 
 7 | class ResLayer(Sequential):
 8 |     """ResLayer to build ResNet style backbone.
 9 | 
10 |     Args:
11 |         block (nn.Module): block used to build ResLayer.
12 |         inplanes (int): inplanes of block.
13 |         planes (int): planes of block.
14 |         num_blocks (int): number of blocks.
15 |         stride (int): stride of the first block. Default: 1
16 |         avg_down (bool): Use AvgPool instead of stride conv when
17 |             downsampling in the bottleneck. Default: False
18 |         conv_cfg (dict): dictionary to construct and config conv layer.
19 |             Default: None
20 |         norm_cfg (dict): dictionary to construct and config norm layer.
21 |             Default: dict(type='BN')
22 |         multi_grid (int | None): Multi grid dilation rates of last
23 |             stage. Default: None
24 |         contract_dilation (bool): Whether contract first dilation of each layer
25 |             Default: False
26 |     """
27 | 
28 |     def __init__(self,
29 |                  block,
30 |                  inplanes,
31 |                  planes,
32 |                  num_blocks,
33 |                  stride=1,
34 |                  dilation=1,
35 |                  avg_down=False,
36 |                  conv_cfg=None,
37 |                  norm_cfg=dict(type='BN'),
38 |                  multi_grid=None,
39 |                  contract_dilation=False,
40 |                  **kwargs):
41 |         self.block = block
42 | 
43 |         downsample = None
44 |         if stride != 1 or inplanes != planes * block.expansion:
45 |             downsample = []
46 |             conv_stride = stride
47 |             if avg_down:
48 |                 conv_stride = 1
49 |                 downsample.append(
50 |                     nn.AvgPool2d(
51 |                         kernel_size=stride,
52 |                         stride=stride,
53 |                         ceil_mode=True,
54 |                         count_include_pad=False))
55 |             downsample.extend([
56 |                 build_conv_layer(
57 |                     conv_cfg,
58 |                     inplanes,
59 |                     planes * block.expansion,
60 |                     kernel_size=1,
61 |                     stride=conv_stride,
62 |                     bias=False),
63 |                 build_norm_layer(norm_cfg, planes * block.expansion)[1]
64 |             ])
65 |             downsample = nn.Sequential(*downsample)
66 | 
67 |         layers = []
68 |         if multi_grid is None:
69 |             if dilation > 1 and contract_dilation:
70 |                 first_dilation = dilation // 2
71 |             else:
72 |                 first_dilation = dilation
73 |         else:
74 |             first_dilation = multi_grid[0]
75 |         layers.append(
76 |             block(
77 |                 inplanes=inplanes,
78 |                 planes=planes,
79 |                 stride=stride,
80 |                 dilation=first_dilation,
81 |                 downsample=downsample,
82 |                 conv_cfg=conv_cfg,
83 |                 norm_cfg=norm_cfg,
84 |                 **kwargs))
85 |         inplanes = planes * block.expansion
86 |         for i in range(1, num_blocks):
87 |             layers.append(
88 |                 block(
89 |                     inplanes=inplanes,
90 |                     planes=planes,
91 |                     stride=1,
92 |                     dilation=dilation if multi_grid is None else multi_grid[i],
93 |                     conv_cfg=conv_cfg,
94 |                     norm_cfg=norm_cfg,
95 |                     **kwargs))
96 |         super(ResLayer, self).__init__(*layers)
97 | 


--------------------------------------------------------------------------------
/mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug_FADC.py:
--------------------------------------------------------------------------------
 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py'
 2 | model = dict(
 3 |     pretrained='open-mmlab://resnet50_v1c', 
 4 |     backbone=dict(
 5 |         depth=50,
 6 |         # type='ResNetV1c',
 7 |         dcn=dict( #在最后三个block加入可变形卷积 
 8 |                 # type='DCNv2',
 9 |                 # type='FreqDecomp_DCNv2',
10 |                 # k_list=[8/1, 8/2, 8/3, 8/4, 8/5, 8/6, 8/7][::-1],
11 |                 # fs_feat='feat',
12 |                 # lp_type='freq',
13 |                 # # lp_type='freq_channel_att',
14 |                 # act='sigmoid',
15 |                 # channel_group=1,
16 |                 # channel_bn=False,
17 |                 # deformable_groups=1, 
18 |                 type='AdaDilatedConv',
19 |                 offset_freq=None,
20 |                 # offset_freq='SLP_res',
21 |                 deformable_groups=1, 
22 |                 padding_mode='zero',
23 |                 kernel_decompose='both',
24 |                 epsilon=1e-4,
25 |                 use_zero_dilation=False,
26 |                 # kernel_decompose=None,
27 |                 pre_fs=False,
28 |                 # pre_fs=True,
29 |                 # conv_type='multifreqband',
30 |                 conv_type='conv',
31 |                 # fs_cfg=None,
32 |                 fs_cfg={
33 |                     # 'k_list':[3,5,7,9],
34 |                     'k_list':[2,4,8],
35 |                     'fs_feat':'feat',
36 |                     'lowfreq_att':False,
37 |                     # 'lp_type':'freq_eca',
38 |                     # 'lp_type':'freq_channel_att',
39 |                     # 'lp_type':'freq',
40 |                     # 'lp_type':'avgpool',
41 |                     'lp_type':'laplacian',
42 |                     'act':'sigmoid',
43 |                     'spatial':'conv',
44 |                     'channel_res':True,
45 |                     'spatial_group':8,
46 |                 },
47 |                 sp_att=False,
48 |                 # type='AAConv',
49 |                 # compress_ratio=4,
50 |                 # lp_kernel=5,
51 |                 # pre_filter=False,
52 |                 # lp_bank=['FLC', 'PALP', 'SLP'],
53 |                 # lp_bank=['FS'],
54 |                 # use_BFM=False,
55 |                 # type='FLCConv',
56 |                 # freq_select_cfg=None,
57 |                 # res_path='high_extra_conv1x1',
58 |                 # anti_aliasing_path=False,
59 |                 # freq_select_cfg={
60 |                 #         # 'k_list':[8/1, 8/2, 8/3, 8/4, 8/5, 8/6, 8/7][::-1],
61 |                 #         # 'k_list':[4/1, 4/2, 4/3][::-1],
62 |                 #         'lowfreq_att':False,
63 |                 #         'fs_feat':'feat',
64 |                 #         # 'lp_type':'freq_eca',
65 |                 #         # 'lp_type':'freq_channel_att',
66 |                 #         'lp_type':'freq',
67 |                 #         'act':'sigmoid',
68 |                 #         'spatial':'conv',
69 |                 #         'channel_res':True,
70 |                 #         'spatial_group':8,
71 |                 #         'global_selection':True,
72 |                 #         'init':'zero'
73 |                 #     },
74 |                 fallback_on_stride=False),
75 |             # dcn=dict( #在最后三个block加入可变形卷积 
76 |             # 	# modulated=False, 
77 |             #     # type='DCN',
78 |             #     deformable_groups=1, fallback_on_stride=False, only_on_stride_conv1=True),
79 |             # stage_with_dcn=(False, True, True, True),
80 |             stage_with_dcn=(False, True, True, True),
81 |         ),
82 |     decode_head=dict(
83 |         type='UPerHead',
84 |         channels=128,)
85 | )
86 | data = dict(
87 |     samples_per_gpu=16,
88 |     workers_per_gpu=16,
89 | )
90 | checkpoint_config = dict(max_keep_ckpts=2)
91 | evaluation = dict(save_best='mIoU', pre_eval='True')


--------------------------------------------------------------------------------
/mmseg_custom/core/seg/sampler/ohem_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from ..builder import PIXEL_SAMPLERS
 7 | from .base_pixel_sampler import BasePixelSampler
 8 | 
 9 | 
10 | @PIXEL_SAMPLERS.register_module()
11 | class OHEMPixelSampler(BasePixelSampler):
12 |     """Online Hard Example Mining Sampler for segmentation.
13 | 
14 |     Args:
15 |         context (nn.Module): The context of sampler, subclass of
16 |             :obj:`BaseDecodeHead`.
17 |         thresh (float, optional): The threshold for hard example selection.
18 |             Below which, are prediction with low confidence. If not
19 |             specified, the hard examples will be pixels of top ``min_kept``
20 |             loss. Default: None.
21 |         min_kept (int, optional): The minimum number of predictions to keep.
22 |             Default: 100000.
23 |     """
24 | 
25 |     def __init__(self, context, thresh=None, min_kept=100000):
26 |         super(OHEMPixelSampler, self).__init__()
27 |         self.context = context
28 |         assert min_kept > 1
29 |         self.thresh = thresh
30 |         self.min_kept = min_kept
31 | 
32 |     def sample(self, seg_logit, seg_label):
33 |         """Sample pixels that have high loss or with low prediction confidence.
34 | 
35 |         Args:
36 |             seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W)
37 |             seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W)
38 | 
39 |         Returns:
40 |             torch.Tensor: segmentation weight, shape (N, H, W)
41 |         """
42 |         with torch.no_grad():
43 |             assert seg_logit.shape[2:] == seg_label.shape[2:]
44 |             assert seg_label.shape[1] == 1
45 |             seg_label = seg_label.squeeze(1).long()
46 |             batch_kept = self.min_kept * seg_label.size(0)
47 |             valid_mask = seg_label != self.context.ignore_index
48 |             seg_weight = seg_logit.new_zeros(size=seg_label.size())
49 |             valid_seg_weight = seg_weight[valid_mask]
50 |             if self.thresh is not None:
51 |                 seg_prob = F.softmax(seg_logit, dim=1)
52 | 
53 |                 tmp_seg_label = seg_label.clone().unsqueeze(1)
54 |                 tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0
55 |                 seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1)
56 |                 sort_prob, sort_indices = seg_prob[valid_mask].sort()
57 | 
58 |                 if sort_prob.numel() > 0:
59 |                     min_threshold = sort_prob[min(batch_kept,
60 |                                                   sort_prob.numel() - 1)]
61 |                 else:
62 |                     min_threshold = 0.0
63 |                 threshold = max(min_threshold, self.thresh)
64 |                 valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.
65 |             else:
66 |                 if not isinstance(self.context.loss_decode, nn.ModuleList):
67 |                     losses_decode = [self.context.loss_decode]
68 |                 else:
69 |                     losses_decode = self.context.loss_decode
70 |                 losses = 0.0
71 |                 for loss_module in losses_decode:
72 |                     losses += loss_module(
73 |                         seg_logit,
74 |                         seg_label,
75 |                         weight=None,
76 |                         ignore_index=self.context.ignore_index,
77 |                         reduction_override='none')
78 | 
79 |                 # faster than topk according to https://github.com/pytorch/pytorch/issues/22812  # noqa
80 |                 _, sort_indices = losses[valid_mask].sort(descending=True)
81 |                 valid_seg_weight[sort_indices[:batch_kept]] = 1.
82 | 
83 |             seg_weight[valid_mask] = valid_seg_weight
84 | 
85 |             return seg_weight
86 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/shape_convert.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | def nlc_to_nchw(x, hw_shape):
  3 |     """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.
  4 | 
  5 |     Args:
  6 |         x (Tensor): The input tensor of shape [N, L, C] before conversion.
  7 |         hw_shape (Sequence[int]): The height and width of output feature map.
  8 | 
  9 |     Returns:
 10 |         Tensor: The output tensor of shape [N, C, H, W] after conversion.
 11 |     """
 12 |     H, W = hw_shape
 13 |     assert len(x.shape) == 3
 14 |     B, L, C = x.shape
 15 |     assert L == H * W, 'The seq_len doesn\'t match H, W'
 16 |     return x.transpose(1, 2).reshape(B, C, H, W)
 17 | 
 18 | 
 19 | def nchw_to_nlc(x):
 20 |     """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.
 21 | 
 22 |     Args:
 23 |         x (Tensor): The input tensor of shape [N, C, H, W] before conversion.
 24 | 
 25 |     Returns:
 26 |         Tensor: The output tensor of shape [N, L, C] after conversion.
 27 |     """
 28 |     assert len(x.shape) == 4
 29 |     return x.flatten(2).transpose(1, 2).contiguous()
 30 | 
 31 | 
 32 | def nchw2nlc2nchw(module, x, contiguous=False, **kwargs):
 33 |     """Flatten [N, C, H, W] shape tensor `x` to [N, L, C] shape tensor. Use the
 34 |     reshaped tensor as the input of `module`, and the convert the output of
 35 |     `module`, whose shape is.
 36 | 
 37 |     [N, L, C], to [N, C, H, W].
 38 | 
 39 |     Args:
 40 |         module (Callable): A callable object the takes a tensor
 41 |             with shape [N, L, C] as input.
 42 |         x (Tensor): The input tensor of shape [N, C, H, W].
 43 |                 contiguous:
 44 |         contiguous (Bool): Whether to make the tensor contiguous
 45 |             after each shape transform.
 46 | 
 47 |     Returns:
 48 |         Tensor: The output tensor of shape [N, C, H, W].
 49 | 
 50 |     Example:
 51 |         >>> import torch
 52 |         >>> import torch.nn as nn
 53 |         >>> norm = nn.LayerNorm(4)
 54 |         >>> feature_map = torch.rand(4, 4, 5, 5)
 55 |         >>> output = nchw2nlc2nchw(norm, feature_map)
 56 |     """
 57 |     B, C, H, W = x.shape
 58 |     if not contiguous:
 59 |         x = x.flatten(2).transpose(1, 2)
 60 |         x = module(x, **kwargs)
 61 |         x = x.transpose(1, 2).reshape(B, C, H, W)
 62 |     else:
 63 |         x = x.flatten(2).transpose(1, 2).contiguous()
 64 |         x = module(x, **kwargs)
 65 |         x = x.transpose(1, 2).reshape(B, C, H, W).contiguous()
 66 |     return x
 67 | 
 68 | 
 69 | def nlc2nchw2nlc(module, x, hw_shape, contiguous=False, **kwargs):
 70 |     """Convert [N, L, C] shape tensor `x` to [N, C, H, W] shape tensor. Use the
 71 |     reshaped tensor as the input of `module`, and convert the output of
 72 |     `module`, whose shape is.
 73 | 
 74 |     [N, C, H, W], to [N, L, C].
 75 | 
 76 |     Args:
 77 |         module (Callable): A callable object the takes a tensor
 78 |             with shape [N, C, H, W] as input.
 79 |         x (Tensor): The input tensor of shape [N, L, C].
 80 |         hw_shape: (Sequence[int]): The height and width of the
 81 |             feature map with shape [N, C, H, W].
 82 |         contiguous (Bool): Whether to make the tensor contiguous
 83 |             after each shape transform.
 84 | 
 85 |     Returns:
 86 |         Tensor: The output tensor of shape [N, L, C].
 87 | 
 88 |     Example:
 89 |         >>> import torch
 90 |         >>> import torch.nn as nn
 91 |         >>> conv = nn.Conv2d(16, 16, 3, 1, 1)
 92 |         >>> feature_map = torch.rand(4, 25, 16)
 93 |         >>> output = nlc2nchw2nlc(conv, feature_map, (5, 5))
 94 |     """
 95 |     H, W = hw_shape
 96 |     assert len(x.shape) == 3
 97 |     B, L, C = x.shape
 98 |     assert L == H * W, 'The seq_len doesn\'t match H, W'
 99 |     if not contiguous:
100 |         x = x.transpose(1, 2).reshape(B, C, H, W)
101 |         x = module(x, **kwargs)
102 |         x = x.flatten(2).transpose(1, 2)
103 |     else:
104 |         x = x.transpose(1, 2).reshape(B, C, H, W).contiguous()
105 |         x = module(x, **kwargs)
106 |         x = x.flatten(2).transpose(1, 2).contiguous()
107 |     return x
108 | 


--------------------------------------------------------------------------------
/mmseg_custom/models/utils/up_conv_block.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule, build_upsample_layer
  5 | 
  6 | 
  7 | class UpConvBlock(nn.Module):
  8 |     """Upsample convolution block in decoder for UNet.
  9 | 
 10 |     This upsample convolution block consists of one upsample module
 11 |     followed by one convolution block. The upsample module expands the
 12 |     high-level low-resolution feature map and the convolution block fuses
 13 |     the upsampled high-level low-resolution feature map and the low-level
 14 |     high-resolution feature map from encoder.
 15 | 
 16 |     Args:
 17 |         conv_block (nn.Sequential): Sequential of convolutional layers.
 18 |         in_channels (int): Number of input channels of the high-level
 19 |         skip_channels (int): Number of input channels of the low-level
 20 |         high-resolution feature map from encoder.
 21 |         out_channels (int): Number of output channels.
 22 |         num_convs (int): Number of convolutional layers in the conv_block.
 23 |             Default: 2.
 24 |         stride (int): Stride of convolutional layer in conv_block. Default: 1.
 25 |         dilation (int): Dilation rate of convolutional layer in conv_block.
 26 |             Default: 1.
 27 |         with_cp (bool): Use checkpoint or not. Using checkpoint will save some
 28 |             memory while slowing down the training speed. Default: False.
 29 |         conv_cfg (dict | None): Config dict for convolution layer.
 30 |             Default: None.
 31 |         norm_cfg (dict | None): Config dict for normalization layer.
 32 |             Default: dict(type='BN').
 33 |         act_cfg (dict | None): Config dict for activation layer in ConvModule.
 34 |             Default: dict(type='ReLU').
 35 |         upsample_cfg (dict): The upsample config of the upsample module in
 36 |             decoder. Default: dict(type='InterpConv'). If the size of
 37 |             high-level feature map is the same as that of skip feature map
 38 |             (low-level feature map from encoder), it does not need upsample the
 39 |             high-level feature map and the upsample_cfg is None.
 40 |         dcn (bool): Use deformable convolution in convolutional layer or not.
 41 |             Default: None.
 42 |         plugins (dict): plugins for convolutional layers. Default: None.
 43 |     """
 44 | 
 45 |     def __init__(self,
 46 |                  conv_block,
 47 |                  in_channels,
 48 |                  skip_channels,
 49 |                  out_channels,
 50 |                  num_convs=2,
 51 |                  stride=1,
 52 |                  dilation=1,
 53 |                  with_cp=False,
 54 |                  conv_cfg=None,
 55 |                  norm_cfg=dict(type='BN'),
 56 |                  act_cfg=dict(type='ReLU'),
 57 |                  upsample_cfg=dict(type='InterpConv'),
 58 |                  dcn=None,
 59 |                  plugins=None):
 60 |         super(UpConvBlock, self).__init__()
 61 |         assert dcn is None, 'Not implemented yet.'
 62 |         assert plugins is None, 'Not implemented yet.'
 63 | 
 64 |         self.conv_block = conv_block(
 65 |             in_channels=2 * skip_channels,
 66 |             out_channels=out_channels,
 67 |             num_convs=num_convs,
 68 |             stride=stride,
 69 |             dilation=dilation,
 70 |             with_cp=with_cp,
 71 |             conv_cfg=conv_cfg,
 72 |             norm_cfg=norm_cfg,
 73 |             act_cfg=act_cfg,
 74 |             dcn=None,
 75 |             plugins=None)
 76 |         if upsample_cfg is not None:
 77 |             self.upsample = build_upsample_layer(
 78 |                 cfg=upsample_cfg,
 79 |                 in_channels=in_channels,
 80 |                 out_channels=skip_channels,
 81 |                 with_cp=with_cp,
 82 |                 norm_cfg=norm_cfg,
 83 |                 act_cfg=act_cfg)
 84 |         else:
 85 |             self.upsample = ConvModule(
 86 |                 in_channels,
 87 |                 skip_channels,
 88 |                 kernel_size=1,
 89 |                 stride=1,
 90 |                 padding=0,
 91 |                 conv_cfg=conv_cfg,
 92 |                 norm_cfg=norm_cfg,
 93 |                 act_cfg=act_cfg)
 94 | 
 95 |     def forward(self, skip, x):
 96 |         """Forward function."""
 97 | 
 98 |         x = self.upsample(x)
 99 |         out = torch.cat([skip, x], dim=1)
100 |         out = self.conv_block(out)
101 | 
102 |         return out
103 | 


--------------------------------------------------------------------------------
/mmseg_custom/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import os.path as osp
  3 | import warnings
  4 | 
  5 | import torch.distributed as dist
  6 | from mmcv.runner import DistEvalHook as _DistEvalHook
  7 | from mmcv.runner import EvalHook as _EvalHook
  8 | from torch.nn.modules.batchnorm import _BatchNorm
  9 | 
 10 | 
 11 | class EvalHook(_EvalHook):
 12 |     """Single GPU EvalHook, with efficient test support.
 13 | 
 14 |     Args:
 15 |         by_epoch (bool): Determine perform evaluation by epoch or by iteration.
 16 |             If set to True, it will perform by epoch. Otherwise, by iteration.
 17 |             Default: False.
 18 |         efficient_test (bool): Whether save the results as local numpy files to
 19 |             save CPU memory during evaluation. Default: False.
 20 |         pre_eval (bool): Whether to use progressive mode to evaluate model.
 21 |             Default: False.
 22 |     Returns:
 23 |         list: The prediction results.
 24 |     """
 25 | 
 26 |     greater_keys = ['mIoU', 'mAcc', 'aAcc']
 27 | 
 28 |     def __init__(self,
 29 |                  *args,
 30 |                  by_epoch=False,
 31 |                  efficient_test=False,
 32 |                  pre_eval=False,
 33 |                  **kwargs):
 34 |         super().__init__(*args, by_epoch=by_epoch, **kwargs)
 35 |         self.pre_eval = pre_eval
 36 |         if efficient_test:
 37 |             warnings.warn(
 38 |                 'DeprecationWarning: ``efficient_test`` for evaluation hook '
 39 |                 'is deprecated, the evaluation hook is CPU memory friendly '
 40 |                 'with ``pre_eval=True`` as argument for ``single_gpu_test()`` '
 41 |                 'function')
 42 | 
 43 |     def _do_evaluate(self, runner):
 44 |         """perform evaluation and save ckpt."""
 45 |         if not self._should_evaluate(runner):
 46 |             return
 47 | 
 48 |         from mmseg.apis import single_gpu_test
 49 |         results = single_gpu_test(
 50 |             runner.model, self.dataloader, show=False, pre_eval=self.pre_eval)
 51 |         runner.log_buffer.clear()
 52 |         runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
 53 |         key_score = self.evaluate(runner, results)
 54 |         if self.save_best:
 55 |             self._save_ckpt(runner, key_score)
 56 | 
 57 | 
 58 | class DistEvalHook(_DistEvalHook):
 59 |     """Distributed EvalHook, with efficient test support.
 60 | 
 61 |     Args:
 62 |         by_epoch (bool): Determine perform evaluation by epoch or by iteration.
 63 |             If set to True, it will perform by epoch. Otherwise, by iteration.
 64 |             Default: False.
 65 |         efficient_test (bool): Whether save the results as local numpy files to
 66 |             save CPU memory during evaluation. Default: False.
 67 |         pre_eval (bool): Whether to use progressive mode to evaluate model.
 68 |             Default: False.
 69 |     Returns:
 70 |         list: The prediction results.
 71 |     """
 72 | 
 73 |     greater_keys = ['mIoU', 'mAcc', 'aAcc']
 74 | 
 75 |     def __init__(self,
 76 |                  *args,
 77 |                  by_epoch=False,
 78 |                  efficient_test=False,
 79 |                  pre_eval=False,
 80 |                  **kwargs):
 81 |         super().__init__(*args, by_epoch=by_epoch, **kwargs)
 82 |         self.pre_eval = pre_eval
 83 |         if efficient_test:
 84 |             warnings.warn(
 85 |                 'DeprecationWarning: ``efficient_test`` for evaluation hook '
 86 |                 'is deprecated, the evaluation hook is CPU memory friendly '
 87 |                 'with ``pre_eval=True`` as argument for ``multi_gpu_test()`` '
 88 |                 'function')
 89 | 
 90 |     def _do_evaluate(self, runner):
 91 |         """perform evaluation and save ckpt."""
 92 |         # Synchronization of BatchNorm's buffer (running_mean
 93 |         # and running_var) is not supported in the DDP of pytorch,
 94 |         # which may cause the inconsistent performance of models in
 95 |         # different ranks, so we broadcast BatchNorm's buffers
 96 |         # of rank 0 to other ranks to avoid this.
 97 |         if self.broadcast_bn_buffer:
 98 |             model = runner.model
 99 |             for name, module in model.named_modules():
100 |                 if isinstance(module,
101 |                               _BatchNorm) and module.track_running_stats:
102 |                     dist.broadcast(module.running_var, 0)
103 |                     dist.broadcast(module.running_mean, 0)
104 | 
105 |         if not self._should_evaluate(runner):
106 |             return
107 | 
108 |         tmpdir = self.tmpdir
109 |         if tmpdir is None:
110 |             tmpdir = osp.join(runner.work_dir, '.eval_hook')
111 | 
112 |         from mmseg.apis import multi_gpu_test
113 |         results = multi_gpu_test(
114 |             runner.model,
115 |             self.dataloader,
116 |             tmpdir=tmpdir,
117 |             gpu_collect=self.gpu_collect,
118 |             pre_eval=self.pre_eval)
119 | 
120 |         runner.log_buffer.clear()
121 | 
122 |         if runner.rank == 0:
123 |             print('\n')
124 |             runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
125 |             key_score = self.evaluate(runner, results)
126 | 
127 |             if self.save_best:
128 |                 self._save_ckpt(runner, key_score)
129 | 


--------------------------------------------------------------------------------
/mmseg_custom/apis/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import matplotlib.pyplot as plt
  3 | import mmcv
  4 | import torch
  5 | from mmcv.parallel import collate, scatter
  6 | from mmcv.runner import load_checkpoint
  7 | 
  8 | from mmseg.datasets.pipelines import Compose
  9 | from mmseg_custom.models import build_segmentor
 10 | 
 11 | 
 12 | def init_segmentor(config, checkpoint=None, device='cuda:0'):
 13 |     """Initialize a segmentor from config file.
 14 | 
 15 |     Args:
 16 |         config (str or :obj:`mmcv.Config`): Config file path or the config
 17 |             object.
 18 |         checkpoint (str, optional): Checkpoint path. If left as None, the model
 19 |             will not load any weights.
 20 |         device (str, optional) CPU/CUDA device option. Default 'cuda:0'.
 21 |             Use 'cpu' for loading model on CPU.
 22 |     Returns:
 23 |         nn.Module: The constructed segmentor.
 24 |     """
 25 |     if isinstance(config, str):
 26 |         config = mmcv.Config.fromfile(config)
 27 |     elif not isinstance(config, mmcv.Config):
 28 |         raise TypeError('config must be a filename or Config object, '
 29 |                         'but got {}'.format(type(config)))
 30 |     config.model.pretrained = None
 31 |     config.model.train_cfg = None
 32 |     model = build_segmentor(config.model, test_cfg=config.get('test_cfg'))
 33 |     if checkpoint is not None:
 34 |         checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
 35 |         model.CLASSES = checkpoint['meta']['CLASSES']
 36 |         model.PALETTE = checkpoint['meta']['PALETTE']
 37 |     model.cfg = config  # save the config in the model for convenience
 38 |     model.to(device)
 39 |     model.eval()
 40 |     return model
 41 | 
 42 | 
 43 | class LoadImage:
 44 |     """A simple pipeline to load image."""
 45 | 
 46 |     def __call__(self, results):
 47 |         """Call function to load images into results.
 48 | 
 49 |         Args:
 50 |             results (dict): A result dict contains the file name
 51 |                 of the image to be read.
 52 | 
 53 |         Returns:
 54 |             dict: ``results`` will be returned containing loaded image.
 55 |         """
 56 | 
 57 |         if isinstance(results['img'], str):
 58 |             results['filename'] = results['img']
 59 |             results['ori_filename'] = results['img']
 60 |         else:
 61 |             results['filename'] = None
 62 |             results['ori_filename'] = None
 63 |         img = mmcv.imread(results['img'])
 64 |         results['img'] = img
 65 |         results['img_shape'] = img.shape
 66 |         results['ori_shape'] = img.shape
 67 |         return results
 68 | 
 69 | 
 70 | def inference_segmentor(model, img):
 71 |     """Inference image(s) with the segmentor.
 72 | 
 73 |     Args:
 74 |         model (nn.Module): The loaded segmentor.
 75 |         imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
 76 |             images.
 77 | 
 78 |     Returns:
 79 |         (list[Tensor]): The segmentation result.
 80 |     """
 81 |     cfg = model.cfg
 82 |     device = next(model.parameters()).device  # model device
 83 |     # build the data pipeline
 84 |     test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
 85 |     test_pipeline = Compose(test_pipeline)
 86 |     # prepare data
 87 |     data = dict(img=img)
 88 |     data = test_pipeline(data)
 89 |     data = collate([data], samples_per_gpu=1)
 90 |     if next(model.parameters()).is_cuda:
 91 |         # scatter to specified GPU
 92 |         data = scatter(data, [device])[0]
 93 |     else:
 94 |         data['img_metas'] = [i.data[0] for i in data['img_metas']]
 95 | 
 96 |     # forward the model
 97 |     with torch.no_grad():
 98 |         result = model(return_loss=False, rescale=True, **data)
 99 |     return result
100 | 
101 | 
102 | def show_result_pyplot(model,
103 |                        img,
104 |                        result,
105 |                        palette=None,
106 |                        fig_size=(15, 10),
107 |                        opacity=0.5,
108 |                        title='',
109 |                        block=True,
110 |                        out_file=None):
111 |     """Visualize the segmentation results on the image.
112 | 
113 |     Args:
114 |         model (nn.Module): The loaded segmentor.
115 |         img (str or np.ndarray): Image filename or loaded image.
116 |         result (list): The segmentation result.
117 |         palette (list[list[int]]] | None): The palette of segmentation
118 |             map. If None is given, random palette will be generated.
119 |             Default: None
120 |         fig_size (tuple): Figure size of the pyplot figure.
121 |         opacity(float): Opacity of painted segmentation map.
122 |             Default 0.5.
123 |             Must be in (0, 1] range.
124 |         title (str): The title of pyplot figure.
125 |             Default is ''.
126 |         block (bool): Whether to block the pyplot figure.
127 |             Default is True.
128 |         out_file (str or None): The path to write the image.
129 |             Default: None.
130 |     """
131 |     if hasattr(model, 'module'):
132 |         model = model.module
133 |     img = model.show_result(
134 |         img, result, palette=palette, show=False, opacity=opacity)
135 |     plt.figure(figsize=fig_size)
136 |     plt.imshow(mmcv.bgr2rgb(img))
137 |     plt.title(title)
138 |     plt.tight_layout()
139 |     plt.show(block=block)
140 |     if out_file is not None:
141 |         mmcv.imwrite(img, out_file)
142 | 


--------------------------------------------------------------------------------