├── .gitignore ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── ade20k.py │ │ ├── ade20k151.py │ │ ├── ade20k_640x640.py │ │ ├── ade20kmetric.py │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── cityscapes20.py │ │ ├── cityscapes20_1024x1024.py │ │ ├── cityscapes_1024x1024.py │ │ ├── cityscapes_2048x1024.py │ │ ├── cityscapes_768x768.py │ │ ├── cityscapes_769x769.py │ │ ├── cityscapes_832x832.py │ │ ├── cityscapesmetric.py │ │ ├── cityscapesmetric_1024x1024.py │ │ ├── coco-stuff10k.py │ │ ├── coco-stuff10k172.py │ │ ├── coco-stuff164k.py │ │ ├── coco-stuff164k172.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── imagenets.py │ │ ├── isaid.py │ │ ├── loveda.py │ │ ├── occlude_face.py │ │ ├── pascal_context.py │ │ ├── pascal_context_59.py │ │ ├── pascal_voc12.py │ │ ├── pascal_voc12_aug.py │ │ ├── potsdam.py │ │ ├── stare.py │ │ └── vaihingen.py │ ├── default_runtime.py │ ├── models │ │ ├── ann_r50-d8.py │ │ ├── apcnet_r50-d8.py │ │ ├── bisenetv1_r18-d32.py │ │ ├── bisenetv2.py │ │ ├── ccnet_r50-d8.py │ │ ├── cgnet.py │ │ ├── danet_r50-d8.py │ │ ├── deeplabv3_r50-d8.py │ │ ├── deeplabv3_unet_s5-d16.py │ │ ├── deeplabv3plus_m-v2-d8.py │ │ ├── deeplabv3plus_r50-d8.py │ │ ├── dmnet_r50-d8.py │ │ ├── dnl_r50-d8.py │ │ ├── dpt_vit-b16.py │ │ ├── emanet_r50-d8.py │ │ ├── encnet_r50-d8.py │ │ ├── erfnet_fcn.py │ │ ├── fast_scnn.py │ │ ├── fastfcn_r50-d32_jpu_psp.py │ │ ├── fcn_hr18.py │ │ ├── fcn_r50-d8.py │ │ ├── fcn_unet_s5-d16.py │ │ ├── fpn_poolformer_s12.py │ │ ├── fpn_r50.py │ │ ├── gcnet_r50-d8.py │ │ ├── icnet_r50-d8.py │ │ ├── isanet_r50-d8.py │ │ ├── lraspp_m-v3-d8.py │ │ ├── mask2former_beit.py │ │ ├── nonlocal_r50-d8.py │ │ ├── ocrnet_hr18.py │ │ ├── ocrnet_r50-d8.py │ │ ├── pointrend_r50.py │ │ ├── psanet_r50-d8.py │ │ ├── pspnet_r50-d8.py │ │ ├── pspnet_unet_s5-d16.py │ │ ├── segformer_mit-b0.py │ │ ├── segformer_mit-b2_segformer_head_unet_fc.py │ │ ├── segmenter_vit-b16_mask.py │ │ ├── setr_mla.py │ │ ├── setr_naive.py │ │ ├── setr_pup.py │ │ ├── stdc.py │ │ ├── twins_pcpvt-s_fpn.py │ │ ├── twins_pcpvt-s_upernet.py │ │ ├── upernet_beit.py │ │ ├── upernet_beit_adapter.py │ │ ├── upernet_convnext.py │ │ ├── upernet_mae.py │ │ ├── upernet_r50.py │ │ ├── upernet_swin.py │ │ └── upernet_vit-b16_ln_mln.py │ └── schedules │ │ ├── schedule_160k.py │ │ ├── schedule_20k.py │ │ ├── schedule_320k.py │ │ ├── schedule_400k.py │ │ ├── schedule_40k.py │ │ └── schedule_80k.py └── ade20k │ ├── segformer_b2_ade20k_multistep.py │ ├── segformer_b2_ade20k_multistep_inference.py │ └── segformer_b2_ade20k_singlestep.py ├── environment.yaml ├── mmseg ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── builder.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ └── metrics.py │ ├── hook │ │ ├── __init__.py │ │ └── wandblogger_hook.py │ ├── optimizers │ │ ├── __init__.py │ │ └── layer_decay_optimizer_constructor.py │ ├── seg │ │ ├── __init__.py │ │ ├── builder.py │ │ └── sampler │ │ │ ├── __init__.py │ │ │ ├── base_pixel_sampler.py │ │ │ └── ohem_pixel_sampler.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_util.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── ade.py │ ├── builder.py │ ├── chase_db1.py │ ├── cityscapes.py │ ├── coco_stuff.py │ ├── custom.py │ ├── dark_zurich.py │ ├── dataset_wrappers.py │ ├── drive.py │ ├── face.py │ ├── hrf.py │ ├── imagenets.py │ ├── isaid.py │ ├── isprs.py │ ├── loveda.py │ ├── night_driving.py │ ├── pascal_context.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── formatting.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── potsdam.py │ ├── samplers │ │ ├── __init__.py │ │ └── distributed_sampler.py │ ├── stare.py │ └── voc.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── beit.py │ │ ├── bisenetv1.py │ │ ├── bisenetv2.py │ │ ├── cgnet.py │ │ ├── erfnet.py │ │ ├── fast_scnn.py │ │ ├── hrnet.py │ │ ├── icnet.py │ │ ├── mae.py │ │ ├── mit.py │ │ ├── mobilenet_v2.py │ │ ├── mobilenet_v3.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── stdc.py │ │ ├── swin.py │ │ ├── timm_backbone.py │ │ ├── twins.py │ │ ├── unet.py │ │ └── vit.py │ ├── builder.py │ ├── decode_heads │ │ ├── __init__.py │ │ ├── ann_head.py │ │ ├── apc_head.py │ │ ├── aspp_head.py │ │ ├── cascade_decode_head.py │ │ ├── cc_head.py │ │ ├── da_head.py │ │ ├── decode_head.py │ │ ├── dm_head.py │ │ ├── dnl_head.py │ │ ├── dpt_head.py │ │ ├── ema_head.py │ │ ├── enc_head.py │ │ ├── fcn_head.py │ │ ├── fpn_head.py │ │ ├── gc_head.py │ │ ├── isa_head.py │ │ ├── knet_head.py │ │ ├── lraspp_head.py │ │ ├── nl_head.py │ │ ├── ocr_head.py │ │ ├── point_head.py │ │ ├── psa_head.py │ │ ├── psp_head.py │ │ ├── segformer_head.py │ │ ├── segmenter_mask_head.py │ │ ├── sep_aspp_head.py │ │ ├── sep_fcn_head.py │ │ ├── setr_mla_head.py │ │ ├── setr_up_head.py │ │ ├── stdc_head.py │ │ └── uper_head.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── cross_entropy_loss.py │ │ ├── dice_loss.py │ │ ├── focal_loss.py │ │ ├── lovasz_loss.py │ │ ├── tversky_loss.py │ │ └── utils.py │ ├── necks │ │ ├── __init__.py │ │ ├── featurepyramid.py │ │ ├── fpn.py │ │ ├── ic_neck.py │ │ ├── jpu.py │ │ ├── mla_neck.py │ │ └── multilevel_neck.py │ ├── segmentors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cascade_encoder_decoder.py │ │ └── encoder_decoder.py │ └── utils │ │ ├── __init__.py │ │ ├── embed.py │ │ ├── inverted_residual.py │ │ ├── make_divisible.py │ │ ├── res_layer.py │ │ ├── se_layer.py │ │ ├── self_attention_block.py │ │ ├── shape_convert.py │ │ └── up_conv_block.py ├── ops │ ├── __init__.py │ ├── encoding.py │ └── wrappers.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── logger.py │ ├── misc.py │ ├── set_env.py │ └── util_distribution.py └── version.py ├── mmseg_custom ├── __init__.py ├── apis │ ├── __init__.py │ ├── test_multi_steps.py │ └── train_multi_steps.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── builder.py │ │ └── point_generator.py │ ├── box │ │ ├── __init__.py │ │ ├── builder.py │ │ └── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── mask_pseudo_sampler.py │ │ │ ├── mask_sampling_result.py │ │ │ └── sampling_result.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── eval_hooks_multi_steps.py │ │ └── metrics.py │ ├── hook │ │ ├── __init__.py │ │ └── ema.py │ ├── mask │ │ ├── __init__.py │ │ └── utils.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── ade20k_151.py │ ├── ade20k_metric.py │ ├── cityscapes_20.py │ ├── cityscapes_metric.py │ ├── coco_stuff_172.py │ └── pipelines │ │ ├── __init__.py │ │ ├── formatting.py │ │ └── transform.py └── models │ ├── __init__.py │ ├── backbones │ ├── __init__.py │ ├── mit_custom_init_weights.py │ ├── mobilenet_v2_custom_init_weights.py │ └── resnet_custom_init_weights.py │ ├── builder.py │ ├── decode_heads │ ├── __init__.py │ ├── diffusion │ │ ├── __init__.py │ │ ├── misc.py │ │ └── schedule.py │ ├── segformer_head_unet_fc_head_multi_step.py │ ├── segformer_head_unet_fc_head_single_step.py │ └── unet │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── norm.py │ │ ├── pos_emb.py │ │ ├── unet.py │ │ └── unet_time_embed.py │ ├── losses │ ├── __init__.py │ ├── cross_entropy_loss.py │ ├── dice_loss.py │ ├── focal_loss.py │ ├── match_costs.py │ └── match_loss.py │ ├── plugins │ ├── __init__.py │ ├── msdeformattn_pixel_decoder.py │ └── pixel_decoder.py │ ├── segmentors │ ├── __init__.py │ ├── encoder_decoder_diffusion.py │ ├── encoder_decoder_diffusion_ensemble.py │ ├── encoder_decoder_ensemble.py │ └── encoder_decoder_freeze.py │ └── utils │ ├── __init__.py │ ├── assigner.py │ ├── point_sample.py │ ├── positional_encoding.py │ └── transformer.py └── tools ├── benchmark.py ├── convert_ema_model.py ├── diffusion_gt_infer.py ├── dist_test.sh ├── dist_test_diffusion.sh ├── dist_test_diffusion_origin.sh ├── dist_train.sh ├── dist_train_diffusion.sh ├── get_flops.py ├── get_params.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── test_diffusion.py ├── test_diffusion_origin.py ├── train.py └── train_diffusion.py /configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/ade20k151.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20K151Dataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=False), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=0), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | samples_per_gpu=4, 37 | workers_per_gpu=4, 38 | train=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | img_dir='images/training', 42 | ann_dir='annotations/training', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | img_dir='images/validation', 48 | ann_dir='annotations/validation', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | data_root=data_root, 53 | img_dir='images/validation', 54 | ann_dir='annotations/validation', 55 | pipeline=test_pipeline)) 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/ade20k_640x640.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (640, 640) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2560, 640), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/ade20kmetric.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KMetricDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ChaseDB1Dataset' 3 | data_root = 'data/CHASE_DB1' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (960, 999) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='leftImg8bit/train', 41 | ann_dir='gtFine/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='leftImg8bit/val', 47 | ann_dir='gtFine/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes20.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'Cityscapes20Dataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotationsCityscapes20'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=0), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='leftImg8bit/train', 41 | ann_dir='gtFine/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='leftImg8bit/val', 47 | ann_dir='gtFine/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes20_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes20.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotationsCityscapes20'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=0), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_2048x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (2048, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_768x768.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (768, 768) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2049, 1025), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (769, 769) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2049, 1025), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_832x832.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (832, 832) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapesmetric.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesMetricDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='leftImg8bit/train', 41 | ann_dir='gtFine/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='leftImg8bit/val', 47 | ann_dir='gtFine/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapesmetric_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapesmetric.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco-stuff10k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuffDataset' 3 | data_root = 'data/coco_stuff10k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | reduce_zero_label=True, 41 | img_dir='images/train2014', 42 | ann_dir='annotations/train2014', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | reduce_zero_label=True, 48 | img_dir='images/test2014', 49 | ann_dir='annotations/test2014', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | reduce_zero_label=True, 55 | img_dir='images/test2014', 56 | ann_dir='annotations/test2014', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco-stuff10k172.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuff172Dataset' 3 | data_root = 'data/coco_stuff10k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotationsCOCOStuff172', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=0), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | reduce_zero_label=True, 41 | img_dir='images/train2014', 42 | ann_dir='annotations/train2014', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | reduce_zero_label=True, 48 | img_dir='images/test2014', 49 | ann_dir='annotations/test2014', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | reduce_zero_label=True, 55 | img_dir='images/test2014', 56 | ann_dir='annotations/test2014', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco-stuff164k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuffDataset' 3 | data_root = 'data/coco_stuff164k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/train2017', 41 | ann_dir='annotations/train2017', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/val2017', 47 | ann_dir='annotations/val2017', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/val2017', 53 | ann_dir='annotations/val2017', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco-stuff164k172.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuff172Dataset' 3 | data_root = 'data/coco_stuff164k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotationsCOCOStuff172'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=0), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/train2017', 41 | ann_dir='annotations/train2017', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/val2017', 47 | ann_dir='annotations/val2017', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/val2017', 53 | ann_dir='annotations/val2017', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DRIVEDataset' 3 | data_root = 'data/DRIVE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (584, 565) 7 | crop_size = (64, 64) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'HRFDataset' 3 | data_root = 'data/HRF' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (2336, 3504) 7 | crop_size = (256, 256) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/imagenets.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNetSDataset' 3 | subset = 919 4 | data_root = 'data/ImageNetS/ImageNetS919' 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | crop_size = (224, 224) 8 | train_pipeline = [ 9 | dict(type='LoadImageNetSImageFromFile', downsample_large_image=True), 10 | dict(type='LoadImageNetSAnnotations', reduce_zero_label=False), 11 | dict(type='Resize', img_scale=(1024, 256), ratio_range=(0.5, 2.0)), 12 | dict( 13 | type='RandomCrop', 14 | crop_size=crop_size, 15 | cat_max_ratio=0.75, 16 | ignore_index=1000), 17 | dict(type='RandomFlip', prob=0.5), 18 | dict(type='PhotoMetricDistortion'), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=1000), 21 | dict(type='DefaultFormatBundle'), 22 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 23 | ] 24 | test_pipeline = [ 25 | dict(type='LoadImageNetSImageFromFile', downsample_large_image=True), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(1024, 256), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=4, 40 | workers_per_gpu=4, 41 | train=dict( 42 | type=dataset_type, 43 | subset=subset, 44 | data_root=data_root, 45 | img_dir='train-semi', 46 | ann_dir='train-semi-segmentation', 47 | pipeline=train_pipeline), 48 | val=dict( 49 | type=dataset_type, 50 | subset=subset, 51 | data_root=data_root, 52 | img_dir='validation', 53 | ann_dir='validation-segmentation', 54 | pipeline=test_pipeline), 55 | test=dict( 56 | type=dataset_type, 57 | subset=subset, 58 | data_root=data_root, 59 | img_dir='validation', 60 | ann_dir='validation-segmentation', 61 | pipeline=test_pipeline)) 62 | -------------------------------------------------------------------------------- /configs/_base_/datasets/isaid.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'iSAIDDataset' 3 | data_root = 'data/iSAID' 4 | 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | """ 8 | This crop_size setting is followed by the implementation of 9 | `PointFlow: Flowing Semantics Through Points for Aerial Image 10 | Segmentation `_. 11 | """ 12 | 13 | crop_size = (896, 896) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='LoadAnnotations'), 18 | dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)), 19 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 20 | dict(type='RandomFlip', prob=0.5), 21 | dict(type='PhotoMetricDistortion'), 22 | dict(type='Normalize', **img_norm_cfg), 23 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 24 | dict(type='DefaultFormatBundle'), 25 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 26 | ] 27 | test_pipeline = [ 28 | dict(type='LoadImageFromFile'), 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=(896, 896), 32 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 33 | flip=False, 34 | transforms=[ 35 | dict(type='Resize', keep_ratio=True), 36 | dict(type='RandomFlip'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=4, 44 | workers_per_gpu=4, 45 | train=dict( 46 | type=dataset_type, 47 | data_root=data_root, 48 | img_dir='img_dir/train', 49 | ann_dir='ann_dir/train', 50 | pipeline=train_pipeline), 51 | val=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='img_dir/val', 55 | ann_dir='ann_dir/val', 56 | pipeline=test_pipeline), 57 | test=dict( 58 | type=dataset_type, 59 | data_root=data_root, 60 | img_dir='img_dir/val', 61 | ann_dir='ann_dir/val', 62 | pipeline=test_pipeline)) 63 | -------------------------------------------------------------------------------- /configs/_base_/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'LoveDADataset' 3 | data_root = 'data/loveDA' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1024, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations'), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /configs/_base_/datasets/pascal_context_59.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset59' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations', reduce_zero_label=True), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /configs/_base_/datasets/pascal_voc12.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalVOCDataset' 3 | data_root = 'data/VOCdevkit/VOC2012' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='JPEGImages', 41 | ann_dir='SegmentationClass', 42 | split='ImageSets/Segmentation/train.txt', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | img_dir='JPEGImages', 48 | ann_dir='SegmentationClass', 49 | split='ImageSets/Segmentation/val.txt', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='JPEGImages', 55 | ann_dir='SegmentationClass', 56 | split='ImageSets/Segmentation/val.txt', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /configs/_base_/datasets/pascal_voc12_aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './pascal_voc12.py' 2 | # dataset settings 3 | data = dict( 4 | train=dict( 5 | ann_dir=['SegmentationClass', 'SegmentationClassAug'], 6 | split=[ 7 | 'ImageSets/Segmentation/train.txt', 8 | 'ImageSets/Segmentation/aug.txt' 9 | ])) 10 | -------------------------------------------------------------------------------- /configs/_base_/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PotsdamDataset' 3 | data_root = 'data/potsdam' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(512, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'STAREDataset' 3 | data_root = 'data/STARE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (605, 700) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/vaihingen.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ISPRSDataset' 3 | data_root = 'data/vaihingen' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(512, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | # dict(type='PaviLoggerHook') # for internal services 8 | ]) 9 | # yapf:enable 10 | dist_params = dict(backend='nccl') 11 | log_level = 'INFO' 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | cudnn_benchmark = True 16 | -------------------------------------------------------------------------------- /configs/_base_/models/ann_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ANNHead', 19 | in_channels=[1024, 2048], 20 | in_index=[2, 3], 21 | channels=512, 22 | project_channels=256, 23 | query_scales=(1, ), 24 | key_pool_scales=(1, 3, 6, 8), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /configs/_base_/models/apcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='APCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/ccnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='CCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | recurrence=2, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/cgnet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='CGNet', 7 | norm_cfg=norm_cfg, 8 | in_channels=3, 9 | num_channels=(32, 64, 128), 10 | num_blocks=(3, 21), 11 | dilations=(2, 4), 12 | reductions=(8, 16)), 13 | decode_head=dict( 14 | type='FCNHead', 15 | in_channels=256, 16 | in_index=2, 17 | channels=256, 18 | num_convs=0, 19 | concat_input=False, 20 | dropout_ratio=0, 21 | num_classes=19, 22 | norm_cfg=norm_cfg, 23 | loss_decode=dict( 24 | type='CrossEntropyLoss', 25 | use_sigmoid=False, 26 | loss_weight=1.0, 27 | class_weight=[ 28 | 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, 29 | 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, 30 | 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, 31 | 10.396974, 10.055647 32 | ])), 33 | # model training and testing settings 34 | train_cfg=dict(sampler=None), 35 | test_cfg=dict(mode='whole')) 36 | -------------------------------------------------------------------------------- /configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pam_channels=64, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/deeplabv3_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/deeplabv3_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='ASPPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | dilations=(1, 12, 24, 36), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=(256, 256), stride=(170, 170))) 51 | -------------------------------------------------------------------------------- /configs/_base_/models/deeplabv3plus_m-v2-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='mmcls://mobilenet_v2', 6 | backbone=dict( 7 | type='MobileNetV2', 8 | widen_factor=1., 9 | strides=(1, 2, 2, 1, 1, 1, 1), 10 | dilations=(1, 1, 1, 2, 2, 4, 4), 11 | out_indices=(1, 2, 4, 6), 12 | norm_cfg=norm_cfg), 13 | decode_head=dict( 14 | type='DepthwiseSeparableASPPHead', 15 | in_channels=320, 16 | in_index=3, 17 | channels=512, 18 | dilations=(1, 12, 24, 36), 19 | c1_in_channels=24, 20 | c1_channels=48, 21 | dropout_ratio=0.1, 22 | num_classes=150, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict( 26 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 27 | auxiliary_head=dict( 28 | type='FCNHead', 29 | in_channels=1024, 30 | in_index=2, 31 | channels=256, 32 | num_convs=1, 33 | concat_input=False, 34 | dropout_ratio=0.1, 35 | num_classes=150, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 40 | # model training and testing settings 41 | train_cfg=dict(), 42 | test_cfg=dict(mode='whole')) 43 | -------------------------------------------------------------------------------- /configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DepthwiseSeparableASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | c1_in_channels=256, 24 | c1_channels=48, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /configs/_base_/models/dmnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DMHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | filter_sizes=(1, 3, 5, 7), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/dnl_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DNLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /configs/_base_/models/dpt_vit-b16.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa 5 | backbone=dict( 6 | type='VisionTransformer', 7 | img_size=224, 8 | embed_dims=768, 9 | num_layers=12, 10 | num_heads=12, 11 | out_indices=(2, 5, 8, 11), 12 | final_norm=False, 13 | with_cls_token=True, 14 | output_cls_token=True), 15 | decode_head=dict( 16 | type='DPTHead', 17 | in_channels=(768, 768, 768, 768), 18 | channels=256, 19 | embed_dims=768, 20 | post_process_channels=[96, 192, 384, 768], 21 | num_classes=150, 22 | readout_type='project', 23 | input_transform='multiple_select', 24 | in_index=(0, 1, 2, 3), 25 | norm_cfg=norm_cfg, 26 | loss_decode=dict( 27 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 28 | auxiliary_head=None, 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) # yapf: disable 32 | -------------------------------------------------------------------------------- /configs/_base_/models/emanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EMAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=256, 22 | ema_channels=512, 23 | num_bases=64, 24 | num_stages=3, 25 | momentum=0.1, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | auxiliary_head=dict( 33 | type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict( 44 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /configs/_base_/models/encnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EncHead', 19 | in_channels=[512, 1024, 2048], 20 | in_index=(1, 2, 3), 21 | channels=512, 22 | num_codes=32, 23 | use_se_loss=True, 24 | add_lateral=False, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 31 | loss_se_decode=dict( 32 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), 33 | auxiliary_head=dict( 34 | type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=19, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /configs/_base_/models/erfnet_fcn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='ERFNet', 8 | in_channels=3, 9 | enc_downsample_channels=(16, 64, 128), 10 | enc_stage_non_bottlenecks=(5, 8), 11 | enc_non_bottleneck_dilations=(2, 4, 8, 16), 12 | enc_non_bottleneck_channels=(64, 128), 13 | dec_upsample_channels=(64, 16), 14 | dec_stages_non_bottleneck=(2, 2), 15 | dec_non_bottleneck_channels=(64, 16), 16 | dropout_ratio=0.1, 17 | init_cfg=None), 18 | decode_head=dict( 19 | type='FCNHead', 20 | in_channels=16, 21 | channels=128, 22 | num_convs=1, 23 | concat_input=False, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | -------------------------------------------------------------------------------- /configs/_base_/models/fast_scnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='FastSCNN', 7 | downsample_dw_channels=(32, 48), 8 | global_in_channels=64, 9 | global_block_channels=(64, 96, 128), 10 | global_block_strides=(2, 2, 1), 11 | global_out_channels=128, 12 | higher_in_channels=64, 13 | lower_in_channels=128, 14 | fusion_out_channels=128, 15 | out_indices=(0, 1, 2), 16 | norm_cfg=norm_cfg, 17 | align_corners=False), 18 | decode_head=dict( 19 | type='DepthwiseSeparableFCNHead', 20 | in_channels=128, 21 | channels=128, 22 | concat_input=False, 23 | num_classes=19, 24 | in_index=-1, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)), 29 | auxiliary_head=[ 30 | dict( 31 | type='FCNHead', 32 | in_channels=128, 33 | channels=32, 34 | num_convs=1, 35 | num_classes=19, 36 | in_index=-2, 37 | norm_cfg=norm_cfg, 38 | concat_input=False, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 42 | dict( 43 | type='FCNHead', 44 | in_channels=64, 45 | channels=32, 46 | num_convs=1, 47 | num_classes=19, 48 | in_index=-3, 49 | norm_cfg=norm_cfg, 50 | concat_input=False, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 54 | ], 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /configs/_base_/models/fastfcn_r50-d32_jpu_psp.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | dilations=(1, 1, 2, 4), 11 | strides=(1, 2, 2, 2), 12 | out_indices=(1, 2, 3), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='JPU', 19 | in_channels=(512, 1024, 2048), 20 | mid_channels=512, 21 | start_level=0, 22 | end_level=-1, 23 | dilations=(1, 2, 4, 8), 24 | align_corners=False, 25 | norm_cfg=norm_cfg), 26 | decode_head=dict( 27 | type='PSPHead', 28 | in_channels=2048, 29 | in_index=2, 30 | channels=512, 31 | pool_scales=(1, 2, 3, 6), 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=1, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /configs/_base_/models/fcn_hr18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://msra/hrnetv2_w18', 6 | backbone=dict( 7 | type='HRNet', 8 | norm_cfg=norm_cfg, 9 | norm_eval=False, 10 | extra=dict( 11 | stage1=dict( 12 | num_modules=1, 13 | num_branches=1, 14 | block='BOTTLENECK', 15 | num_blocks=(4, ), 16 | num_channels=(64, )), 17 | stage2=dict( 18 | num_modules=1, 19 | num_branches=2, 20 | block='BASIC', 21 | num_blocks=(4, 4), 22 | num_channels=(18, 36)), 23 | stage3=dict( 24 | num_modules=4, 25 | num_branches=3, 26 | block='BASIC', 27 | num_blocks=(4, 4, 4), 28 | num_channels=(18, 36, 72)), 29 | stage4=dict( 30 | num_modules=3, 31 | num_branches=4, 32 | block='BASIC', 33 | num_blocks=(4, 4, 4, 4), 34 | num_channels=(18, 36, 72, 144)))), 35 | decode_head=dict( 36 | type='FCNHead', 37 | in_channels=[18, 36, 72, 144], 38 | in_index=(0, 1, 2, 3), 39 | channels=sum([18, 36, 72, 144]), 40 | input_transform='resize_concat', 41 | kernel_size=1, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=-1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /configs/_base_/models/fcn_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='FCNHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | num_convs=2, 23 | concat_input=True, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /configs/_base_/models/fcn_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='FCNHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=64, 28 | num_convs=1, 29 | concat_input=False, 30 | dropout_ratio=0.1, 31 | num_classes=2, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | auxiliary_head=dict( 37 | type='FCNHead', 38 | in_channels=128, 39 | in_index=3, 40 | channels=64, 41 | num_convs=1, 42 | concat_input=False, 43 | dropout_ratio=0.1, 44 | num_classes=2, 45 | norm_cfg=norm_cfg, 46 | align_corners=False, 47 | loss_decode=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 49 | # model training and testing settings 50 | train_cfg=dict(), 51 | test_cfg=dict(mode='slide', crop_size=(256, 256), stride=(170, 170))) 52 | -------------------------------------------------------------------------------- /configs/_base_/models/fpn_poolformer_s12.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa 4 | custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) 5 | model = dict( 6 | type='EncoderDecoder', 7 | backbone=dict( 8 | type='mmcls.PoolFormer', 9 | arch='s12', 10 | init_cfg=dict( 11 | type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'), 12 | in_patch_size=7, 13 | in_stride=4, 14 | in_pad=2, 15 | down_patch_size=3, 16 | down_stride=2, 17 | down_pad=1, 18 | drop_rate=0., 19 | drop_path_rate=0., 20 | out_indices=(0, 2, 4, 6), 21 | frozen_stages=0, 22 | ), 23 | neck=dict( 24 | type='FPN', 25 | in_channels=[256, 512, 1024, 2048], 26 | out_channels=256, 27 | num_outs=4), 28 | decode_head=dict( 29 | type='FPNHead', 30 | in_channels=[256, 256, 256, 256], 31 | in_index=[0, 1, 2, 3], 32 | feature_strides=[4, 8, 16, 32], 33 | channels=128, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | # model training and testing settings 41 | train_cfg=dict(), 42 | test_cfg=dict(mode='whole')) 43 | -------------------------------------------------------------------------------- /configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=4), 22 | decode_head=dict( 23 | type='FPNHead', 24 | in_channels=[256, 256, 256, 256], 25 | in_index=[0, 1, 2, 3], 26 | feature_strides=[4, 8, 16, 32], 27 | channels=128, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /configs/_base_/models/gcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='GCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | ratio=1 / 4., 23 | pooling_type='att', 24 | fusion_types=('channel_add', ), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /configs/_base_/models/isanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ISAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | isa_channels=256, 23 | down_factor=(8, 8), 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /configs/_base_/models/lraspp_m-v3-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='MobileNetV3', 7 | arch='large', 8 | out_indices=(1, 3, 16), 9 | norm_cfg=norm_cfg), 10 | decode_head=dict( 11 | type='LRASPPHead', 12 | in_channels=(16, 24, 960), 13 | in_index=(0, 1, 2), 14 | channels=128, 15 | input_transform='multiple_select', 16 | dropout_ratio=0.1, 17 | num_classes=19, 18 | norm_cfg=norm_cfg, 19 | act_cfg=dict(type='ReLU'), 20 | align_corners=False, 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 23 | # model training and testing settings 24 | train_cfg=dict(), 25 | test_cfg=dict(mode='whole')) 26 | -------------------------------------------------------------------------------- /configs/_base_/models/nonlocal_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='NLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /configs/_base_/models/ocrnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 2, 4), 13 | strides=(1, 2, 1, 1), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | decode_head=[ 19 | dict( 20 | type='FCNHead', 21 | in_channels=1024, 22 | in_index=2, 23 | channels=256, 24 | num_convs=1, 25 | concat_input=False, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 32 | dict( 33 | type='OCRHead', 34 | in_channels=2048, 35 | in_index=3, 36 | channels=512, 37 | ocr_channels=256, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 44 | ], 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /configs/_base_/models/pointrend_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 1, 1), 13 | strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | num_outs=4), 23 | decode_head=[ 24 | dict( 25 | type='FPNHead', 26 | in_channels=[256, 256, 256, 256], 27 | in_index=[0, 1, 2, 3], 28 | feature_strides=[4, 8, 16, 32], 29 | channels=128, 30 | dropout_ratio=-1, 31 | num_classes=19, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | dict( 37 | type='PointHead', 38 | in_channels=[256], 39 | in_index=[0], 40 | channels=256, 41 | num_fcs=3, 42 | coarse_pred_each_layer=True, 43 | dropout_ratio=-1, 44 | num_classes=19, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 48 | ], 49 | # model training and testing settings 50 | train_cfg=dict( 51 | num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), 52 | test_cfg=dict( 53 | mode='whole', 54 | subdivision_steps=2, 55 | subdivision_num_points=8196, 56 | scale_factor=2)) 57 | -------------------------------------------------------------------------------- /configs/_base_/models/psanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | mask_size=(97, 97), 23 | psa_type='bi-direction', 24 | compact=False, 25 | shrink_factor=2, 26 | normalization_factor=1.0, 27 | psa_softmax=True, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=dict( 35 | type='FCNHead', 36 | in_channels=1024, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /configs/_base_/models/pspnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/pspnet_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='PSPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | pool_scales=(1, 2, 3, 6), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=(256, 256), stride=(170, 170))) 51 | -------------------------------------------------------------------------------- /configs/_base_/models/segformer_mit-b0.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='MixVisionTransformer', 8 | in_channels=3, 9 | embed_dims=32, 10 | num_stages=4, 11 | num_layers=[2, 2, 2, 2], 12 | num_heads=[1, 2, 5, 8], 13 | patch_sizes=[7, 3, 3, 3], 14 | sr_ratios=[8, 4, 2, 1], 15 | out_indices=(0, 1, 2, 3), 16 | mlp_ratio=4, 17 | qkv_bias=True, 18 | drop_rate=0.0, 19 | attn_drop_rate=0.0, 20 | drop_path_rate=0.1), 21 | decode_head=dict( 22 | type='SegformerHead', 23 | in_channels=[32, 64, 160, 256], 24 | in_index=[0, 1, 2, 3], 25 | channels=256, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole')) 35 | -------------------------------------------------------------------------------- /configs/_base_/models/segformer_mit-b2_segformer_head_unet_fc.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | 4 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa 5 | model = dict( 6 | type='EncoderDecoderFreeze', 7 | freeze_parameters=['backbone', 'decode_head'], 8 | pretrained=checkpoint, 9 | backbone=dict( 10 | type='MixVisionTransformer', 11 | in_channels=3, 12 | embed_dims=64, 13 | num_stages=4, 14 | num_layers=[3, 4, 6, 3], 15 | num_heads=[1, 2, 5, 8], 16 | patch_sizes=[7, 3, 3, 3], 17 | sr_ratios=[8, 4, 2, 1], 18 | out_indices=(0, 1, 2, 3), 19 | mlp_ratio=4, 20 | qkv_bias=True, 21 | drop_rate=0.0, 22 | attn_drop_rate=0.0, 23 | drop_path_rate=0.1), 24 | decode_head=dict( 25 | type='SegformerHeadUnetFCHead', 26 | # unet params 27 | pretrained='pretrained/segformer_mit-b2_512x512_160k_ade20k_decode_head.pth', 28 | dim=256, 29 | out_dim=256, 30 | unet_channels=272, 31 | dim_mults=[1,2,4], 32 | cat_embedding_dim=16, 33 | # decode head params 34 | in_channels=[64, 128, 320, 512], 35 | in_index=[0, 1, 2, 3], 36 | channels=256, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | # ignore_index=0, # ignore background 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', 44 | use_sigmoid=False, 45 | loss_weight=1.0) 46 | ), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /configs/_base_/models/segmenter_vit-b16_mask.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa 2 | # model settings 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=checkpoint, 7 | backbone=dict( 8 | type='VisionTransformer', 9 | img_size=(512, 512), 10 | patch_size=16, 11 | in_channels=3, 12 | embed_dims=768, 13 | num_layers=12, 14 | num_heads=12, 15 | drop_path_rate=0.1, 16 | attn_drop_rate=0.0, 17 | drop_rate=0.0, 18 | final_norm=True, 19 | norm_cfg=backbone_norm_cfg, 20 | with_cls_token=True, 21 | interpolate_mode='bicubic', 22 | ), 23 | decode_head=dict( 24 | type='SegmenterMaskTransformerHead', 25 | in_channels=768, 26 | channels=768, 27 | num_classes=150, 28 | num_layers=2, 29 | num_heads=12, 30 | embed_dims=768, 31 | dropout_ratio=0.0, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 34 | ), 35 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), 36 | ) 37 | -------------------------------------------------------------------------------- /configs/_base_/models/twins_pcpvt-s_fpn.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | model = dict( 7 | type='EncoderDecoder', 8 | backbone=dict( 9 | type='PCPVT', 10 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 11 | in_channels=3, 12 | embed_dims=[64, 128, 320, 512], 13 | num_heads=[1, 2, 5, 8], 14 | patch_sizes=[4, 2, 2, 2], 15 | strides=[4, 2, 2, 2], 16 | mlp_ratios=[8, 8, 4, 4], 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | norm_cfg=backbone_norm_cfg, 20 | depths=[3, 4, 6, 3], 21 | sr_ratios=[8, 4, 2, 1], 22 | norm_after_stage=False, 23 | drop_rate=0.0, 24 | attn_drop_rate=0., 25 | drop_path_rate=0.2), 26 | neck=dict( 27 | type='FPN', 28 | in_channels=[64, 128, 320, 512], 29 | out_channels=256, 30 | num_outs=4), 31 | decode_head=dict( 32 | type='FPNHead', 33 | in_channels=[256, 256, 256, 256], 34 | in_index=[0, 1, 2, 3], 35 | feature_strides=[4, 8, 16, 32], 36 | channels=128, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /configs/_base_/models/twins_pcpvt-s_upernet.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | model = dict( 7 | type='EncoderDecoder', 8 | backbone=dict( 9 | type='PCPVT', 10 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 11 | in_channels=3, 12 | embed_dims=[64, 128, 320, 512], 13 | num_heads=[1, 2, 5, 8], 14 | patch_sizes=[4, 2, 2, 2], 15 | strides=[4, 2, 2, 2], 16 | mlp_ratios=[8, 8, 4, 4], 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | norm_cfg=backbone_norm_cfg, 20 | depths=[3, 4, 6, 3], 21 | sr_ratios=[8, 4, 2, 1], 22 | norm_after_stage=False, 23 | drop_rate=0.0, 24 | attn_drop_rate=0., 25 | drop_path_rate=0.2), 26 | decode_head=dict( 27 | type='UPerHead', 28 | in_channels=[64, 128, 320, 512], 29 | in_index=[0, 1, 2, 3], 30 | pool_scales=(1, 2, 3, 6), 31 | channels=512, 32 | dropout_ratio=0.1, 33 | num_classes=150, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=320, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=150, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_beit.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained=None, 5 | backbone=dict( 6 | type='BEiT', 7 | img_size=(640, 640), 8 | patch_size=16, 9 | in_channels=3, 10 | embed_dims=768, 11 | num_layers=12, 12 | num_heads=12, 13 | mlp_ratio=4, 14 | out_indices=(3, 5, 7, 11), 15 | qv_bias=True, 16 | attn_drop_rate=0.0, 17 | drop_path_rate=0.1, 18 | norm_cfg=dict(type='LN', eps=1e-6), 19 | act_cfg=dict(type='GELU'), 20 | norm_eval=False, 21 | init_values=0.1), 22 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 23 | decode_head=dict( 24 | type='UPerHead', 25 | in_channels=[768, 768, 768, 768], 26 | in_index=[0, 1, 2, 3], 27 | pool_scales=(1, 2, 3, 6), 28 | channels=768, 29 | dropout_ratio=0.1, 30 | num_classes=150, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=768, 38 | in_index=2, 39 | channels=256, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=150, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='whole')) 51 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_beit_adapter.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254) 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit 4 | # Copyright (c) 2021 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # By Hangbo Bao 7 | # Based on timm, mmseg, setr, xcit and swin code bases 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm 9 | # https://github.com/fudan-zvg/SETR 10 | # https://github.com/facebookresearch/xcit/ 11 | # https://github.com/microsoft/Swin-Transformer 12 | # --------------------------------------------------------' 13 | norm_cfg = dict(type='SyncBN', requires_grad=True) 14 | model = dict( 15 | type='EncoderDecoder', 16 | pretrained=None, 17 | backbone=dict( 18 | type='XCiT', 19 | patch_size=16, 20 | embed_dim=384, 21 | depth=12, 22 | num_heads=8, 23 | mlp_ratio=4, 24 | qkv_bias=True, 25 | use_abs_pos_emb=True, 26 | use_rel_pos_bias=False, 27 | ), 28 | decode_head=dict( 29 | type='UPerHead', 30 | in_channels=[384, 384, 384, 384], 31 | in_index=[0, 1, 2, 3], 32 | pool_scales=(1, 2, 3, 6), 33 | channels=512, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | auxiliary_head=dict( 41 | type='FCNHead', 42 | in_channels=384, 43 | in_index=2, 44 | channels=256, 45 | num_convs=1, 46 | concat_input=False, 47 | dropout_ratio=0.1, 48 | num_classes=19, 49 | norm_cfg=norm_cfg, 50 | align_corners=False, 51 | loss_decode=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='whole')) -------------------------------------------------------------------------------- /configs/_base_/models/upernet_convnext.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=None, 7 | backbone=dict( 8 | type='mmcls.ConvNeXt', 9 | arch='base', 10 | out_indices=[0, 1, 2, 3], 11 | drop_path_rate=0.4, 12 | layer_scale_init_value=1.0, 13 | gap_before_final_norm=False, 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint=checkpoint_file, 16 | prefix='backbone.')), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[128, 256, 512, 1024], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=384, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_mae.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained=None, 5 | backbone=dict( 6 | type='MAE', 7 | img_size=(640, 640), 8 | patch_size=16, 9 | in_channels=3, 10 | embed_dims=768, 11 | num_layers=12, 12 | num_heads=12, 13 | mlp_ratio=4, 14 | out_indices=(3, 5, 7, 11), 15 | attn_drop_rate=0.0, 16 | drop_path_rate=0.1, 17 | norm_cfg=dict(type='LN', eps=1e-6), 18 | act_cfg=dict(type='GELU'), 19 | norm_eval=False, 20 | init_values=0.1), 21 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 22 | decode_head=dict( 23 | type='UPerHead', 24 | in_channels=[384, 384, 384, 384], 25 | in_index=[0, 1, 2, 3], 26 | pool_scales=(1, 2, 3, 6), 27 | channels=512, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=dict( 35 | type='FCNHead', 36 | in_channels=384, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[256, 512, 1024, 2048], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_swin.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=None, 7 | backbone=dict( 8 | type='SwinTransformer', 9 | pretrain_img_size=224, 10 | embed_dims=96, 11 | patch_size=4, 12 | window_size=7, 13 | mlp_ratio=4, 14 | depths=[2, 2, 6, 2], 15 | num_heads=[3, 6, 12, 24], 16 | strides=(4, 2, 2, 2), 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | qk_scale=None, 20 | patch_norm=True, 21 | drop_rate=0., 22 | attn_drop_rate=0., 23 | drop_path_rate=0.3, 24 | use_abs_pos_embed=False, 25 | act_cfg=dict(type='GELU'), 26 | norm_cfg=backbone_norm_cfg), 27 | decode_head=dict( 28 | type='UPerHead', 29 | in_channels=[96, 192, 384, 768], 30 | in_index=[0, 1, 2, 3], 31 | pool_scales=(1, 2, 3, 6), 32 | channels=512, 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=384, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /configs/_base_/models/upernet_vit-b16_ln_mln.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', 6 | backbone=dict( 7 | type='VisionTransformer', 8 | img_size=(512, 512), 9 | patch_size=16, 10 | in_channels=3, 11 | embed_dims=768, 12 | num_layers=12, 13 | num_heads=12, 14 | mlp_ratio=4, 15 | out_indices=(2, 5, 8, 11), 16 | qkv_bias=True, 17 | drop_rate=0.0, 18 | attn_drop_rate=0.0, 19 | drop_path_rate=0.0, 20 | with_cls_token=True, 21 | norm_cfg=dict(type='LN', eps=1e-6), 22 | act_cfg=dict(type='GELU'), 23 | norm_eval=False, 24 | interpolate_mode='bicubic'), 25 | neck=dict( 26 | type='MultiLevelNeck', 27 | in_channels=[768, 768, 768, 768], 28 | out_channels=768, 29 | scales=[4, 2, 1, 0.5]), 30 | decode_head=dict( 31 | type='UPerHead', 32 | in_channels=[768, 768, 768, 768], 33 | in_index=[0, 1, 2, 3], 34 | pool_scales=(1, 2, 3, 6), 35 | channels=512, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=768, 45 | in_index=3, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) # yapf: disable 58 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU') 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True, save_best='mIoU') 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=320000) 8 | checkpoint_config = dict(by_epoch=False, interval=32000) 9 | evaluation = dict(interval=32000, metric='mIoU', save_best='mIoU') 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_400k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=400000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU') 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True, save_best='mIoU') 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU') 10 | -------------------------------------------------------------------------------- /mmseg/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | import mmcv 5 | from packaging.version import parse 6 | 7 | from .version import __version__, version_info 8 | 9 | MMCV_MIN = '1.3.13' 10 | MMCV_MAX = '1.8.0' 11 | 12 | 13 | def digit_version(version_str: str, length: int = 4): 14 | """Convert a version string into a tuple of integers. 15 | 16 | This method is usually used for comparing two versions. For pre-release 17 | versions: alpha < beta < rc. 18 | 19 | Args: 20 | version_str (str): The version string. 21 | length (int): The maximum number of version levels. Default: 4. 22 | 23 | Returns: 24 | tuple[int]: The version info in digits (integers). 25 | """ 26 | version = parse(version_str) 27 | assert version.release, f'failed to parse version {version_str}' 28 | release = list(version.release) 29 | release = release[:length] 30 | if len(release) < length: 31 | release = release + [0] * (length - len(release)) 32 | if version.is_prerelease: 33 | mapping = {'a': -3, 'b': -2, 'rc': -1} 34 | val = -4 35 | # version.pre can be None 36 | if version.pre: 37 | if version.pre[0] not in mapping: 38 | warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 39 | 'version checking may go wrong') 40 | else: 41 | val = mapping[version.pre[0]] 42 | release.extend([val, version.pre[-1]]) 43 | else: 44 | release.extend([val, 0]) 45 | 46 | elif version.is_postrelease: 47 | release.extend([1, version.post]) 48 | else: 49 | release.extend([0, 0]) 50 | return tuple(release) 51 | 52 | 53 | mmcv_min_version = digit_version(MMCV_MIN) 54 | mmcv_max_version = digit_version(MMCV_MAX) 55 | mmcv_version = digit_version(mmcv.__version__) 56 | 57 | 58 | assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \ 59 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 60 | f'Please install mmcv>={mmcv_min_version}, <{mmcv_max_version}.' 61 | 62 | __all__ = ['__version__', 'version_info', 'digit_version'] 63 | -------------------------------------------------------------------------------- /mmseg/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import (get_root_logger, init_random_seed, set_random_seed, 5 | train_segmentor) 6 | 7 | __all__ = [ 8 | 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', 9 | 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', 10 | 'show_result_pyplot', 'init_random_seed' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import (OPTIMIZER_BUILDERS, build_optimizer, 3 | build_optimizer_constructor) 4 | from .evaluation import * # noqa: F401, F403 5 | from .hook import * # noqa: F401, F403 6 | from .optimizers import * # noqa: F401, F403 7 | from .seg import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | 10 | __all__ = [ 11 | 'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor' 12 | ] 13 | -------------------------------------------------------------------------------- /mmseg/core/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS 5 | from mmcv.utils import Registry, build_from_cfg 6 | 7 | OPTIMIZER_BUILDERS = Registry( 8 | 'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS) 9 | 10 | 11 | def build_optimizer_constructor(cfg): 12 | constructor_type = cfg.get('type') 13 | if constructor_type in OPTIMIZER_BUILDERS: 14 | return build_from_cfg(cfg, OPTIMIZER_BUILDERS) 15 | elif constructor_type in MMCV_OPTIMIZER_BUILDERS: 16 | return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS) 17 | else: 18 | raise KeyError(f'{constructor_type} is not registered ' 19 | 'in the optimizer builder registry.') 20 | 21 | 22 | def build_optimizer(model, cfg): 23 | optimizer_cfg = copy.deepcopy(cfg) 24 | constructor_type = optimizer_cfg.pop('constructor', 25 | 'DefaultOptimizerConstructor') 26 | paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) 27 | optim_constructor = build_optimizer_constructor( 28 | dict( 29 | type=constructor_type, 30 | optimizer_cfg=optimizer_cfg, 31 | paramwise_cfg=paramwise_cfg)) 32 | optimizer = optim_constructor(model) 33 | return optimizer 34 | -------------------------------------------------------------------------------- /mmseg/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .class_names import get_classes, get_palette 3 | from .eval_hooks import DistEvalHook, EvalHook 4 | from .metrics import (eval_metrics, intersect_and_union, mean_dice, 5 | mean_fscore, mean_iou, pre_eval_to_metrics) 6 | 7 | __all__ = [ 8 | 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', 9 | 'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics', 10 | 'intersect_and_union' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .wandblogger_hook import MMSegWandbHook 3 | 4 | __all__ = ['MMSegWandbHook'] 5 | -------------------------------------------------------------------------------- /mmseg/core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .layer_decay_optimizer_constructor import ( 3 | LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) 4 | 5 | __all__ = [ 6 | 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' 7 | ] 8 | -------------------------------------------------------------------------------- /mmseg/core/seg/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_pixel_sampler 3 | from .sampler import BasePixelSampler, OHEMPixelSampler 4 | 5 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /mmseg/core/seg/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | PIXEL_SAMPLERS = Registry('pixel sampler') 5 | 6 | 7 | def build_pixel_sampler(cfg, **default_args): 8 | """Build pixel sampler for segmentation map.""" 9 | return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) 10 | -------------------------------------------------------------------------------- /mmseg/core/seg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_pixel_sampler import BasePixelSampler 3 | from .ohem_pixel_sampler import OHEMPixelSampler 4 | 5 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /mmseg/core/seg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BasePixelSampler(metaclass=ABCMeta): 6 | """Base class of pixel sampler.""" 7 | 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | @abstractmethod 12 | def sample(self, seg_logit, seg_label): 13 | """Placeholder for sample function.""" 14 | -------------------------------------------------------------------------------- /mmseg/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_util import check_dist_init, sync_random_seed 3 | from .misc import add_prefix 4 | 5 | __all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed'] 6 | -------------------------------------------------------------------------------- /mmseg/core/utils/dist_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | import torch.distributed as dist 5 | from mmcv.runner import get_dist_info 6 | 7 | 8 | def check_dist_init(): 9 | return dist.is_available() and dist.is_initialized() 10 | 11 | 12 | def sync_random_seed(seed=None, device='cuda'): 13 | """Make sure different ranks share the same seed. All workers must call 14 | this function, otherwise it will deadlock. This method is generally used in 15 | `DistributedSampler`, because the seed should be identical across all 16 | processes in the distributed group. 17 | 18 | In distributed sampling, different ranks should sample non-overlapped 19 | data in the dataset. Therefore, this function is used to make sure that 20 | each rank shuffles the data indices in the same order based 21 | on the same seed. Then different ranks could use different indices 22 | to select non-overlapped data from the same data list. 23 | 24 | Args: 25 | seed (int, Optional): The seed. Default to None. 26 | device (str): The device where the seed will be put on. 27 | Default to 'cuda'. 28 | Returns: 29 | int: Seed to be used. 30 | """ 31 | 32 | if seed is None: 33 | seed = np.random.randint(2**31) 34 | assert isinstance(seed, int) 35 | 36 | rank, world_size = get_dist_info() 37 | 38 | if world_size == 1: 39 | return seed 40 | 41 | if rank == 0: 42 | random_num = torch.tensor(seed, dtype=torch.int32, device=device) 43 | else: 44 | random_num = torch.tensor(0, dtype=torch.int32, device=device) 45 | dist.broadcast(random_num, src=0) 46 | return random_num.item() 47 | -------------------------------------------------------------------------------- /mmseg/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def add_prefix(inputs, prefix): 3 | """Add prefix for dict. 4 | 5 | Args: 6 | inputs (dict): The input dict with str keys. 7 | prefix (str): The prefix to add. 8 | 9 | Returns: 10 | 11 | dict: The dict with keys updated with ``prefix``. 12 | """ 13 | 14 | outputs = dict() 15 | for name, value in inputs.items(): 16 | outputs[f'{prefix}.{name}'] = value 17 | 18 | return outputs 19 | -------------------------------------------------------------------------------- /mmseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ade import ADE20KDataset 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 4 | from .chase_db1 import ChaseDB1Dataset 5 | from .cityscapes import CityscapesDataset 6 | from .coco_stuff import COCOStuffDataset 7 | from .custom import CustomDataset 8 | from .dark_zurich import DarkZurichDataset 9 | from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, 10 | RepeatDataset) 11 | from .drive import DRIVEDataset 12 | from .face import FaceOccludedDataset 13 | from .hrf import HRFDataset 14 | from .imagenets import (ImageNetSDataset, LoadImageNetSAnnotations, 15 | LoadImageNetSImageFromFile) 16 | from .isaid import iSAIDDataset 17 | from .isprs import ISPRSDataset 18 | from .loveda import LoveDADataset 19 | from .night_driving import NightDrivingDataset 20 | from .pascal_context import PascalContextDataset, PascalContextDataset59 21 | from .potsdam import PotsdamDataset 22 | from .stare import STAREDataset 23 | from .voc import PascalVOCDataset 24 | 25 | __all__ = [ 26 | 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 27 | 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', 28 | 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', 29 | 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 30 | 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 31 | 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', 32 | 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset', 'FaceOccludedDataset', 33 | 'ImageNetSDataset', 'LoadImageNetSAnnotations', 34 | 'LoadImageNetSImageFromFile' 35 | ] 36 | -------------------------------------------------------------------------------- /mmseg/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class ChaseDB1Dataset(CustomDataset): 9 | """Chase_db1 dataset. 10 | 11 | In segmentation map annotation for Chase_db1, 0 stands for background, 12 | which is included in 2 categories. ``reduce_zero_label`` is fixed to False. 13 | The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_1stHO.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(ChaseDB1Dataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_1stHO.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg/datasets/dark_zurich.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .cityscapes import CityscapesDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class DarkZurichDataset(CityscapesDataset): 8 | """DarkZurichDataset dataset.""" 9 | 10 | def __init__(self, **kwargs): 11 | super().__init__( 12 | img_suffix='_rgb_anon.png', 13 | seg_map_suffix='_gt_labelTrainIds.png', 14 | **kwargs) 15 | -------------------------------------------------------------------------------- /mmseg/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class DRIVEDataset(CustomDataset): 9 | """DRIVE dataset. 10 | 11 | In segmentation map annotation for DRIVE, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_manual1.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(DRIVEDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_manual1.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg/datasets/face.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class FaceOccludedDataset(CustomDataset): 10 | """Face Occluded dataset. 11 | 12 | Args: 13 | split (str): Split txt file for Pascal VOC. 14 | """ 15 | 16 | CLASSES = ('background', 'face') 17 | 18 | PALETTE = [[0, 0, 0], [128, 0, 0]] 19 | 20 | def __init__(self, split, **kwargs): 21 | super(FaceOccludedDataset, self).__init__( 22 | img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) 23 | assert osp.exists(self.img_dir) and self.split is not None 24 | -------------------------------------------------------------------------------- /mmseg/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class HRFDataset(CustomDataset): 9 | """HRF dataset. 10 | 11 | In segmentation map annotation for HRF, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(HRFDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg/datasets/isprs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class ISPRSDataset(CustomDataset): 8 | """ISPRS dataset. 9 | 10 | In segmentation map annotation for LoveDA, 0 is the ignore index. 11 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 12 | ``seg_map_suffix`` are both fixed to '.png'. 13 | """ 14 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 15 | 'car', 'clutter') 16 | 17 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 18 | [255, 255, 0], [255, 0, 0]] 19 | 20 | def __init__(self, **kwargs): 21 | super(ISPRSDataset, self).__init__( 22 | img_suffix='.png', 23 | seg_map_suffix='.png', 24 | reduce_zero_label=True, 25 | **kwargs) 26 | -------------------------------------------------------------------------------- /mmseg/datasets/night_driving.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .cityscapes import CityscapesDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class NightDrivingDataset(CityscapesDataset): 8 | """NightDrivingDataset dataset.""" 9 | 10 | def __init__(self, **kwargs): 11 | super().__init__( 12 | img_suffix='_leftImg8bit.png', 13 | seg_map_suffix='_gtCoarse_labelTrainIds.png', 14 | **kwargs) 15 | -------------------------------------------------------------------------------- /mmseg/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor, 4 | Transpose, to_tensor) 5 | from .loading import LoadAnnotations, LoadImageFromFile 6 | from .test_time_aug import MultiScaleFlipAug 7 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, 8 | PhotoMetricDistortion, RandomCrop, RandomCutOut, 9 | RandomFlip, RandomMosaic, RandomRotate, Rerange, 10 | Resize, RGB2Gray, SegRescale) 11 | 12 | __all__ = [ 13 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 14 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 15 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 16 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 17 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut', 18 | 'RandomMosaic' 19 | ] 20 | -------------------------------------------------------------------------------- /mmseg/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from ..builder import PIPELINES 7 | 8 | 9 | @PIPELINES.register_module() 10 | class Compose(object): 11 | """Compose multiple transforms sequentially. 12 | 13 | Args: 14 | transforms (Sequence[dict | callable]): Sequence of transform object or 15 | config dict to be composed. 16 | """ 17 | 18 | def __init__(self, transforms): 19 | assert isinstance(transforms, collections.abc.Sequence) 20 | self.transforms = [] 21 | for transform in transforms: 22 | if isinstance(transform, dict): 23 | transform = build_from_cfg(transform, PIPELINES) 24 | self.transforms.append(transform) 25 | elif callable(transform): 26 | self.transforms.append(transform) 27 | else: 28 | raise TypeError('transform must be callable or a dict') 29 | 30 | def __call__(self, data): 31 | """Call function to apply transforms sequentially. 32 | 33 | Args: 34 | data (dict): A result dict contains the data to transform. 35 | 36 | Returns: 37 | dict: Transformed data. 38 | """ 39 | 40 | for t in self.transforms: 41 | data = t(data) 42 | if data is None: 43 | return None 44 | return data 45 | 46 | def __repr__(self): 47 | format_string = self.__class__.__name__ + '(' 48 | for t in self.transforms: 49 | format_string += '\n' 50 | format_string += f' {t}' 51 | format_string += '\n)' 52 | return format_string 53 | -------------------------------------------------------------------------------- /mmseg/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa 3 | import warnings 4 | 5 | from .formatting import * 6 | 7 | warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be ' 8 | 'deprecated in 2021, please replace it with ' 9 | 'mmseg.datasets.pipelines.formatting.') 10 | -------------------------------------------------------------------------------- /mmseg/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class PotsdamDataset(CustomDataset): 8 | """ISPRS Potsdam dataset. 9 | 10 | In segmentation map annotation for Potsdam dataset, 0 is the ignore index. 11 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 12 | ``seg_map_suffix`` are both fixed to '.png'. 13 | """ 14 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 15 | 'car', 'clutter') 16 | 17 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 18 | [255, 255, 0], [255, 0, 0]] 19 | 20 | def __init__(self, **kwargs): 21 | super(PotsdamDataset, self).__init__( 22 | img_suffix='.png', 23 | seg_map_suffix='.png', 24 | reduce_zero_label=True, 25 | **kwargs) 26 | -------------------------------------------------------------------------------- /mmseg/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .distributed_sampler import DistributedSampler 3 | 4 | __all__ = ['DistributedSampler'] 5 | -------------------------------------------------------------------------------- /mmseg/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class STAREDataset(CustomDataset): 10 | """STARE dataset. 11 | 12 | In segmentation map annotation for STARE, 0 stands for background, which is 13 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 14 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '.ah.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(STAREDataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='.ah.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /mmseg/datasets/voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class PascalVOCDataset(CustomDataset): 10 | """Pascal VOC dataset. 11 | 12 | Args: 13 | split (str): Split txt file for Pascal VOC. 14 | """ 15 | 16 | CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 17 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 18 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 19 | 'train', 'tvmonitor') 20 | 21 | PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 22 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], 23 | [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], 24 | [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], 25 | [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] 26 | 27 | def __init__(self, split, **kwargs): 28 | super(PascalVOCDataset, self).__init__( 29 | img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) 30 | assert osp.exists(self.img_dir) and self.split is not None 31 | -------------------------------------------------------------------------------- /mmseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, 4 | build_head, build_loss, build_segmentor) 5 | from .decode_heads import * # noqa: F401,F403 6 | from .losses import * # noqa: F401,F403 7 | from .necks import * # noqa: F401,F403 8 | from .segmentors import * # noqa: F401,F403 9 | 10 | __all__ = [ 11 | 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', 12 | 'build_head', 'build_loss', 'build_segmentor' 13 | ] 14 | -------------------------------------------------------------------------------- /mmseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .beit import BEiT 3 | from .bisenetv1 import BiSeNetV1 4 | from .bisenetv2 import BiSeNetV2 5 | from .cgnet import CGNet 6 | from .erfnet import ERFNet 7 | from .fast_scnn import FastSCNN 8 | from .hrnet import HRNet 9 | from .icnet import ICNet 10 | from .mae import MAE 11 | from .mit import MixVisionTransformer 12 | from .mobilenet_v2 import MobileNetV2 13 | from .mobilenet_v3 import MobileNetV3 14 | from .resnest import ResNeSt 15 | from .resnet import ResNet, ResNetV1c, ResNetV1d 16 | from .resnext import ResNeXt 17 | from .stdc import STDCContextPathNet, STDCNet 18 | from .swin import SwinTransformer 19 | from .timm_backbone import TIMMBackbone 20 | from .twins import PCPVT, SVT 21 | from .unet import UNet 22 | from .vit import VisionTransformer 23 | 24 | __all__ = [ 25 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', 26 | 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 27 | 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', 28 | 'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT', 29 | 'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE' 30 | ] 31 | -------------------------------------------------------------------------------- /mmseg/models/backbones/timm_backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | try: 3 | import timm 4 | except ImportError: 5 | timm = None 6 | 7 | from mmcv.cnn.bricks.registry import NORM_LAYERS 8 | from mmcv.runner import BaseModule 9 | 10 | from ..builder import BACKBONES 11 | 12 | 13 | @BACKBONES.register_module() 14 | class TIMMBackbone(BaseModule): 15 | """Wrapper to use backbones from timm library. More details can be found in 16 | `timm `_ . 17 | 18 | Args: 19 | model_name (str): Name of timm model to instantiate. 20 | pretrained (bool): Load pretrained weights if True. 21 | checkpoint_path (str): Path of checkpoint to load after 22 | model is initialized. 23 | in_channels (int): Number of input image channels. Default: 3. 24 | init_cfg (dict, optional): Initialization config dict 25 | **kwargs: Other timm & model specific arguments. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | model_name, 31 | features_only=True, 32 | pretrained=True, 33 | checkpoint_path='', 34 | in_channels=3, 35 | init_cfg=None, 36 | **kwargs, 37 | ): 38 | if timm is None: 39 | raise RuntimeError('timm is not installed') 40 | super(TIMMBackbone, self).__init__(init_cfg) 41 | if 'norm_layer' in kwargs: 42 | kwargs['norm_layer'] = NORM_LAYERS.get(kwargs['norm_layer']) 43 | self.timm_model = timm.create_model( 44 | model_name=model_name, 45 | features_only=features_only, 46 | pretrained=pretrained, 47 | in_chans=in_channels, 48 | checkpoint_path=checkpoint_path, 49 | **kwargs, 50 | ) 51 | 52 | # Make unused parameters None 53 | self.timm_model.global_pool = None 54 | self.timm_model.fc = None 55 | self.timm_model.classifier = None 56 | 57 | # Hack to use pretrained weights from timm 58 | if pretrained or checkpoint_path: 59 | self._is_init = True 60 | 61 | def forward(self, x): 62 | features = self.timm_model(x) 63 | return features 64 | -------------------------------------------------------------------------------- /mmseg/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.cnn import MODELS as MMCV_MODELS 5 | from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION 6 | from mmcv.utils import Registry 7 | 8 | MODELS = Registry('models', parent=MMCV_MODELS) 9 | ATTENTION = Registry('attention', parent=MMCV_ATTENTION) 10 | 11 | BACKBONES = MODELS 12 | NECKS = MODELS 13 | HEADS = MODELS 14 | LOSSES = MODELS 15 | SEGMENTORS = MODELS 16 | 17 | 18 | def build_backbone(cfg): 19 | """Build backbone.""" 20 | return BACKBONES.build(cfg) 21 | 22 | 23 | def build_neck(cfg): 24 | """Build neck.""" 25 | return NECKS.build(cfg) 26 | 27 | 28 | def build_head(cfg): 29 | """Build head.""" 30 | return HEADS.build(cfg) 31 | 32 | 33 | def build_loss(cfg): 34 | """Build loss.""" 35 | return LOSSES.build(cfg) 36 | 37 | 38 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 39 | """Build segmentor.""" 40 | if train_cfg is not None or test_cfg is not None: 41 | warnings.warn( 42 | 'train_cfg and test_cfg is deprecated, ' 43 | 'please specify them in model', UserWarning) 44 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 45 | 'train_cfg specified in both outer field and model field ' 46 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 47 | 'test_cfg specified in both outer field and model field ' 48 | return SEGMENTORS.build( 49 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 50 | -------------------------------------------------------------------------------- /mmseg/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ann_head import ANNHead 3 | from .apc_head import APCHead 4 | from .aspp_head import ASPPHead 5 | from .cc_head import CCHead 6 | from .da_head import DAHead 7 | from .dm_head import DMHead 8 | from .dnl_head import DNLHead 9 | from .dpt_head import DPTHead 10 | from .ema_head import EMAHead 11 | from .enc_head import EncHead 12 | from .fcn_head import FCNHead 13 | from .fpn_head import FPNHead 14 | from .gc_head import GCHead 15 | from .isa_head import ISAHead 16 | from .knet_head import IterativeDecodeHead, KernelUpdateHead, KernelUpdator 17 | from .lraspp_head import LRASPPHead 18 | from .nl_head import NLHead 19 | from .ocr_head import OCRHead 20 | from .point_head import PointHead 21 | from .psa_head import PSAHead 22 | from .psp_head import PSPHead 23 | from .segformer_head import SegformerHead 24 | from .segmenter_mask_head import SegmenterMaskTransformerHead 25 | from .sep_aspp_head import DepthwiseSeparableASPPHead 26 | from .sep_fcn_head import DepthwiseSeparableFCNHead 27 | from .setr_mla_head import SETRMLAHead 28 | from .setr_up_head import SETRUPHead 29 | from .stdc_head import STDCHead 30 | from .uper_head import UPerHead 31 | 32 | __all__ = [ 33 | 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 34 | 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', 35 | 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', 36 | 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', 37 | 'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegmenterMaskTransformerHead', 38 | 'SegformerHead', 'ISAHead', 'STDCHead', 'IterativeDecodeHead', 39 | 'KernelUpdateHead', 'KernelUpdator' 40 | ] 41 | -------------------------------------------------------------------------------- /mmseg/models/decode_heads/cc_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from ..builder import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | try: 8 | from mmcv.ops import CrissCrossAttention 9 | except ModuleNotFoundError: 10 | CrissCrossAttention = None 11 | 12 | 13 | @HEADS.register_module() 14 | class CCHead(FCNHead): 15 | """CCNet: Criss-Cross Attention for Semantic Segmentation. 16 | 17 | This head is the implementation of `CCNet 18 | `_. 19 | 20 | Args: 21 | recurrence (int): Number of recurrence of Criss Cross Attention 22 | module. Default: 2. 23 | """ 24 | 25 | def __init__(self, recurrence=2, **kwargs): 26 | if CrissCrossAttention is None: 27 | raise RuntimeError('Please install mmcv-full for ' 28 | 'CrissCrossAttention ops') 29 | super(CCHead, self).__init__(num_convs=2, **kwargs) 30 | self.recurrence = recurrence 31 | self.cca = CrissCrossAttention(self.channels) 32 | 33 | def forward(self, inputs): 34 | """Forward function.""" 35 | x = self._transform_inputs(inputs) 36 | output = self.convs[0](x) 37 | for _ in range(self.recurrence): 38 | output = self.cca(output) 39 | output = self.convs[1](output) 40 | if self.concat_input: 41 | output = self.conv_cat(torch.cat([x, output], dim=1)) 42 | output = self.cls_seg(output) 43 | return output 44 | -------------------------------------------------------------------------------- /mmseg/models/decode_heads/gc_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import ContextBlock 4 | 5 | from ..builder import HEADS 6 | from .fcn_head import FCNHead 7 | 8 | 9 | @HEADS.register_module() 10 | class GCHead(FCNHead): 11 | """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. 12 | 13 | This head is the implementation of `GCNet 14 | `_. 15 | 16 | Args: 17 | ratio (float): Multiplier of channels ratio. Default: 1/4. 18 | pooling_type (str): The pooling type of context aggregation. 19 | Options are 'att', 'avg'. Default: 'avg'. 20 | fusion_types (tuple[str]): The fusion type for feature fusion. 21 | Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) 22 | """ 23 | 24 | def __init__(self, 25 | ratio=1 / 4., 26 | pooling_type='att', 27 | fusion_types=('channel_add', ), 28 | **kwargs): 29 | super(GCHead, self).__init__(num_convs=2, **kwargs) 30 | self.ratio = ratio 31 | self.pooling_type = pooling_type 32 | self.fusion_types = fusion_types 33 | self.gc_block = ContextBlock( 34 | in_channels=self.channels, 35 | ratio=self.ratio, 36 | pooling_type=self.pooling_type, 37 | fusion_types=self.fusion_types) 38 | 39 | def forward(self, inputs): 40 | """Forward function.""" 41 | x = self._transform_inputs(inputs) 42 | output = self.convs[0](x) 43 | output = self.gc_block(output) 44 | output = self.convs[1](output) 45 | if self.concat_input: 46 | output = self.conv_cat(torch.cat([x, output], dim=1)) 47 | output = self.cls_seg(output) 48 | return output 49 | -------------------------------------------------------------------------------- /mmseg/models/decode_heads/nl_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import NonLocal2d 4 | 5 | from ..builder import HEADS 6 | from .fcn_head import FCNHead 7 | 8 | 9 | @HEADS.register_module() 10 | class NLHead(FCNHead): 11 | """Non-local Neural Networks. 12 | 13 | This head is the implementation of `NLNet 14 | `_. 15 | 16 | Args: 17 | reduction (int): Reduction factor of projection transform. Default: 2. 18 | use_scale (bool): Whether to scale pairwise_weight by 19 | sqrt(1/inter_channels). Default: True. 20 | mode (str): The nonlocal mode. Options are 'embedded_gaussian', 21 | 'dot_product'. Default: 'embedded_gaussian.'. 22 | """ 23 | 24 | def __init__(self, 25 | reduction=2, 26 | use_scale=True, 27 | mode='embedded_gaussian', 28 | **kwargs): 29 | super(NLHead, self).__init__(num_convs=2, **kwargs) 30 | self.reduction = reduction 31 | self.use_scale = use_scale 32 | self.mode = mode 33 | self.nl_block = NonLocal2d( 34 | in_channels=self.channels, 35 | reduction=self.reduction, 36 | use_scale=self.use_scale, 37 | conv_cfg=self.conv_cfg, 38 | norm_cfg=self.norm_cfg, 39 | mode=self.mode) 40 | 41 | def forward(self, inputs): 42 | """Forward function.""" 43 | x = self._transform_inputs(inputs) 44 | output = self.convs[0](x) 45 | output = self.nl_block(output) 46 | output = self.convs[1](output) 47 | if self.concat_input: 48 | output = self.conv_cat(torch.cat([x, output], dim=1)) 49 | output = self.cls_seg(output) 50 | return output 51 | -------------------------------------------------------------------------------- /mmseg/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .accuracy import Accuracy, accuracy 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .dice_loss import DiceLoss 6 | from .focal_loss import FocalLoss 7 | from .lovasz_loss import LovaszLoss 8 | from .tversky_loss import TverskyLoss 9 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 10 | 11 | __all__ = [ 12 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 13 | 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 14 | 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss', 15 | 'FocalLoss', 'TverskyLoss' 16 | ] 17 | -------------------------------------------------------------------------------- /mmseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .featurepyramid import Feature2Pyramid 3 | from .fpn import FPN 4 | from .ic_neck import ICNeck 5 | from .jpu import JPU 6 | from .mla_neck import MLANeck 7 | from .multilevel_neck import MultiLevelNeck 8 | 9 | __all__ = [ 10 | 'FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU', 'Feature2Pyramid' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseSegmentor 3 | from .cascade_encoder_decoder import CascadeEncoderDecoder 4 | from .encoder_decoder import EncoderDecoder 5 | 6 | __all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] 7 | -------------------------------------------------------------------------------- /mmseg/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .embed import PatchEmbed 3 | from .inverted_residual import InvertedResidual, InvertedResidualV3 4 | from .make_divisible import make_divisible 5 | from .res_layer import ResLayer 6 | from .se_layer import SELayer 7 | from .self_attention_block import SelfAttentionBlock 8 | from .shape_convert import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc, 9 | nlc_to_nchw) 10 | from .up_conv_block import UpConvBlock 11 | 12 | __all__ = [ 13 | 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 14 | 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', 15 | 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc' 16 | ] 17 | -------------------------------------------------------------------------------- /mmseg/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 3 | """Make divisible function. 4 | 5 | This function rounds the channel number to the nearest value that can be 6 | divisible by the divisor. It is taken from the original tf repo. It ensures 7 | that all layers have a channel number that is divisible by divisor. It can 8 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 9 | 10 | Args: 11 | value (int): The original channel number. 12 | divisor (int): The divisor to fully divide the channel number. 13 | min_value (int): The minimum value of the output channel. 14 | Default: None, means that the minimum value equal to the divisor. 15 | min_ratio (float): The minimum ratio of the rounded channel number to 16 | the original channel number. Default: 0.9. 17 | 18 | Returns: 19 | int: The modified output channel number. 20 | """ 21 | 22 | if min_value is None: 23 | min_value = divisor 24 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 25 | # Make sure that round down does not go down by more than (1-min_ratio). 26 | if new_value < min_ratio * value: 27 | new_value += divisor 28 | return new_value 29 | -------------------------------------------------------------------------------- /mmseg/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .encoding import Encoding 3 | from .wrappers import Upsample, resize 4 | 5 | __all__ = ['Upsample', 'resize', 'Encoding'] 6 | -------------------------------------------------------------------------------- /mmseg/ops/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | def resize(input, 9 | size=None, 10 | scale_factor=None, 11 | mode='nearest', 12 | align_corners=None, 13 | warning=True): 14 | if warning: 15 | if size is not None and align_corners: 16 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 17 | output_h, output_w = tuple(int(x) for x in size) 18 | if output_h > input_h or output_w > input_w: 19 | if ((output_h > 1 and output_w > 1 and input_h > 1 20 | and input_w > 1) and (output_h - 1) % (input_h - 1) 21 | and (output_w - 1) % (input_w - 1)): 22 | warnings.warn( 23 | f'When align_corners={align_corners}, ' 24 | 'the output would more aligned if ' 25 | f'input size {(input_h, input_w)} is `x+1` and ' 26 | f'out size {(output_h, output_w)} is `nx+1`') 27 | return F.interpolate(input, size, scale_factor, mode, align_corners) 28 | 29 | 30 | class Upsample(nn.Module): 31 | 32 | def __init__(self, 33 | size=None, 34 | scale_factor=None, 35 | mode='nearest', 36 | align_corners=None): 37 | super(Upsample, self).__init__() 38 | self.size = size 39 | if isinstance(scale_factor, tuple): 40 | self.scale_factor = tuple(float(factor) for factor in scale_factor) 41 | else: 42 | self.scale_factor = float(scale_factor) if scale_factor else None 43 | self.mode = mode 44 | self.align_corners = align_corners 45 | 46 | def forward(self, x): 47 | if not self.size: 48 | size = [int(t * self.scale_factor) for t in x.shape[-2:]] 49 | else: 50 | size = self.size 51 | return resize(x, size, None, self.mode, self.align_corners) 52 | -------------------------------------------------------------------------------- /mmseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .collect_env import collect_env 3 | from .logger import get_root_logger 4 | from .misc import find_latest_checkpoint 5 | from .set_env import setup_multi_processes 6 | from .util_distribution import build_ddp, build_dp, get_device 7 | 8 | __all__ = [ 9 | 'get_root_logger', 'collect_env', 'find_latest_checkpoint', 10 | 'setup_multi_processes', 'build_ddp', 'build_dp', 'get_device' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmseg 6 | 7 | 8 | def collect_env(): 9 | """Collect the information of the running environments.""" 10 | env_info = collect_base_env() 11 | env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' 12 | 13 | return env_info 14 | 15 | 16 | if __name__ == '__main__': 17 | for name, val in collect_env().items(): 18 | print('{}: {}'.format(name, val)) 19 | -------------------------------------------------------------------------------- /mmseg/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | from mmcv.utils import get_logger 5 | 6 | 7 | def get_root_logger(log_file=None, log_level=logging.INFO): 8 | """Get the root logger. 9 | 10 | The logger will be initialized if it has not been initialized. By default a 11 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 12 | also be added. The name of the root logger is the top-level package name, 13 | e.g., "mmseg". 14 | 15 | Args: 16 | log_file (str | None): The log filename. If specified, a FileHandler 17 | will be added to the root logger. 18 | log_level (int): The root logger level. Note that only the process of 19 | rank 0 is affected, while other processes will set the level to 20 | "Error" and be silent most of the time. 21 | 22 | Returns: 23 | logging.Logger: The root logger. 24 | """ 25 | 26 | logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) 27 | 28 | return logger 29 | -------------------------------------------------------------------------------- /mmseg/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import glob 3 | import os.path as osp 4 | import warnings 5 | 6 | 7 | def find_latest_checkpoint(path, suffix='pth'): 8 | """This function is for finding the latest checkpoint. 9 | 10 | It will be used when automatically resume, modified from 11 | https://github.com/open-mmlab/mmdetection/blob/dev-v2.20.0/mmdet/utils/misc.py 12 | 13 | Args: 14 | path (str): The path to find checkpoints. 15 | suffix (str): File extension for the checkpoint. Defaults to pth. 16 | 17 | Returns: 18 | latest_path(str | None): File path of the latest checkpoint. 19 | """ 20 | if not osp.exists(path): 21 | warnings.warn("The path of the checkpoints doesn't exist.") 22 | return None 23 | if osp.exists(osp.join(path, f'latest.{suffix}')): 24 | return osp.join(path, f'latest.{suffix}') 25 | 26 | checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) 27 | if len(checkpoints) == 0: 28 | warnings.warn('The are no checkpoints in the path') 29 | return None 30 | latest = -1 31 | latest_path = '' 32 | for checkpoint in checkpoints: 33 | if len(checkpoint) < len(latest_path): 34 | continue 35 | # `count` is iteration number, as checkpoints are saved as 36 | # 'iter_xx.pth' or 'epoch_xx.pth' and xx is iteration number. 37 | count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) 38 | if count > latest: 39 | latest = count 40 | latest_path = checkpoint 41 | return latest_path 42 | -------------------------------------------------------------------------------- /mmseg/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.30.0' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /mmseg_custom/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * 2 | from .datasets import * 3 | from .apis import * 4 | from .core import * -------------------------------------------------------------------------------- /mmseg_custom/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_multi_steps import single_gpu_test_multi_steps, multi_gpu_test_multi_steps 2 | from .train_multi_steps import train_segmentor_multi_steps, init_random_seed, set_random_seed 3 | 4 | __all__ = ['single_gpu_test_multi_steps', 'multi_gpu_test_multi_steps', 5 | 'train_segmentor_multi_steps', 'init_random_seed', 'set_random_seed'] -------------------------------------------------------------------------------- /mmseg_custom/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * 2 | from .box import * 3 | from .evaluation import * 4 | from .hook import * 5 | from .mask import * 6 | from .utils import * -------------------------------------------------------------------------------- /mmseg_custom/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .point_generator import MlvlPointGenerator # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /mmseg_custom/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points') 7 | 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS 9 | 10 | 11 | def build_prior_generator(cfg, default_args=None): 12 | return build_from_cfg(cfg, PRIOR_GENERATORS, default_args) 13 | 14 | 15 | def build_anchor_generator(cfg, default_args=None): 16 | warnings.warn( 17 | '``build_anchor_generator`` would be deprecated soon, please use ' 18 | '``build_prior_generator`` ') 19 | return build_prior_generator(cfg, default_args=default_args) 20 | -------------------------------------------------------------------------------- /mmseg_custom/core/box/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .builder import * # noqa: F401,F403 3 | from .samplers import MaskPseudoSampler # noqa: F401,F403 4 | -------------------------------------------------------------------------------- /mmseg_custom/core/box/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_sampler(cfg, **default_args): 9 | """Builder of box sampler.""" 10 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 11 | 12 | 13 | def build_bbox_coder(cfg, **default_args): 14 | """Builder of box coder.""" 15 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 16 | -------------------------------------------------------------------------------- /mmseg_custom/core/box/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .mask_pseudo_sampler import MaskPseudoSampler # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /mmseg_custom/core/box/samplers/mask_pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """copy from 3 | https://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py.""" 4 | 5 | import torch 6 | 7 | from ..builder import BBOX_SAMPLERS 8 | from .base_sampler import BaseSampler 9 | from .mask_sampling_result import MaskSamplingResult 10 | 11 | 12 | @BBOX_SAMPLERS.register_module() 13 | class MaskPseudoSampler(BaseSampler): 14 | """A pseudo sampler that does not do sampling actually.""" 15 | def __init__(self, **kwargs): 16 | pass 17 | 18 | def _sample_pos(self, **kwargs): 19 | """Sample positive samples.""" 20 | raise NotImplementedError 21 | 22 | def _sample_neg(self, **kwargs): 23 | """Sample negative samples.""" 24 | raise NotImplementedError 25 | 26 | def sample(self, assign_result, masks, gt_masks, **kwargs): 27 | """Directly returns the positive and negative indices of samples. 28 | 29 | Args: 30 | assign_result (:obj:`AssignResult`): Assigned results 31 | masks (torch.Tensor): Bounding boxes 32 | gt_masks (torch.Tensor): Ground truth boxes 33 | Returns: 34 | :obj:`SamplingResult`: sampler results 35 | """ 36 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, 37 | as_tuple=False).squeeze(-1).unique() 38 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, 39 | as_tuple=False).squeeze(-1).unique() 40 | gt_flags = masks.new_zeros(masks.shape[0], dtype=torch.uint8) 41 | sampling_result = MaskSamplingResult(pos_inds, neg_inds, masks, 42 | gt_masks, assign_result, gt_flags) 43 | return sampling_result 44 | -------------------------------------------------------------------------------- /mmseg_custom/core/box/samplers/mask_sampling_result.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """copy from 3 | https://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py.""" 4 | 5 | import torch 6 | 7 | from .sampling_result import SamplingResult 8 | 9 | 10 | class MaskSamplingResult(SamplingResult): 11 | """Mask sampling result.""" 12 | def __init__(self, pos_inds, neg_inds, masks, gt_masks, assign_result, 13 | gt_flags): 14 | self.pos_inds = pos_inds 15 | self.neg_inds = neg_inds 16 | self.pos_masks = masks[pos_inds] 17 | self.neg_masks = masks[neg_inds] 18 | self.pos_is_gt = gt_flags[pos_inds] 19 | 20 | self.num_gts = gt_masks.shape[0] 21 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 22 | 23 | if gt_masks.numel() == 0: 24 | # hack for index error case 25 | assert self.pos_assigned_gt_inds.numel() == 0 26 | self.pos_gt_masks = torch.empty_like(gt_masks) 27 | else: 28 | self.pos_gt_masks = gt_masks[self.pos_assigned_gt_inds, :] 29 | 30 | if assign_result.labels is not None: 31 | self.pos_gt_labels = assign_result.labels[pos_inds] 32 | else: 33 | self.pos_gt_labels = None 34 | 35 | @property 36 | def masks(self): 37 | """torch.Tensor: concatenated positive and negative boxes""" 38 | return torch.cat([self.pos_masks, self.neg_masks]) 39 | 40 | def __nice__(self): 41 | data = self.info.copy() 42 | data['pos_masks'] = data.pop('pos_masks').shape 43 | data['neg_masks'] = data.pop('neg_masks').shape 44 | parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())] 45 | body = ' ' + ',\n '.join(parts) 46 | return '{\n' + body + '\n}' 47 | 48 | @property 49 | def info(self): 50 | """Returns a dictionary of info about the object.""" 51 | return { 52 | 'pos_inds': self.pos_inds, 53 | 'neg_inds': self.neg_inds, 54 | 'pos_masks': self.pos_masks, 55 | 'neg_masks': self.neg_masks, 56 | 'pos_is_gt': self.pos_is_gt, 57 | 'num_gts': self.num_gts, 58 | 'pos_assigned_gt_inds': self.pos_assigned_gt_inds, 59 | } 60 | -------------------------------------------------------------------------------- /mmseg_custom/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks_multi_steps import EvalHookMultiSteps, DistEvalHookMultiSteps 2 | 3 | __all__ = ['EvalHookMultiSteps', 'DistEvalHookMultiSteps'] -------------------------------------------------------------------------------- /mmseg_custom/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | from .ema import ConstantMomentumEMAHook 2 | 3 | __all__ = ['ConstantMomentumEMAHook'] -------------------------------------------------------------------------------- /mmseg_custom/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .utils import mask2bbox # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /mmseg_custom/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads, 3 | reduce_mean) 4 | from .misc import add_prefix, multi_apply 5 | 6 | __all__ = [ 7 | 'add_prefix', 'multi_apply', 'DistOptimizerHook', 'allreduce_grads', 8 | 'all_reduce_dict', 'reduce_mean' 9 | ] 10 | -------------------------------------------------------------------------------- /mmseg_custom/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def multi_apply(func, *args, **kwargs): 3 | """Apply function to a list of arguments. 4 | 5 | Note: 6 | This function applies the ``func`` to multiple inputs and 7 | map the multiple outputs of the ``func`` into different 8 | list. Each list contains the same type of outputs corresponding 9 | to different inputs. 10 | 11 | Args: 12 | func (Function): A function that will be applied to a list of 13 | arguments 14 | 15 | Returns: 16 | tuple(list): A tuple containing multiple list, each list contains \ 17 | a kind of returned results by the function 18 | """ 19 | pfunc = partial(func, **kwargs) if kwargs else func 20 | map_results = map(pfunc, *args) 21 | return tuple(map(list, zip(*map_results))) 22 | 23 | 24 | def add_prefix(inputs, prefix): 25 | """Add prefix for dict. 26 | 27 | Args: 28 | inputs (dict): The input dict with str keys. 29 | prefix (str): The prefix to add. 30 | 31 | Returns: 32 | 33 | dict: The dict with keys updated with ``prefix``. 34 | """ 35 | 36 | outputs = dict() 37 | for name, value in inputs.items(): 38 | outputs[f'{prefix}.{name}'] = value 39 | 40 | return outputs 41 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .ade20k_metric import ADE20KMetricDataset 2 | from .ade20k_151 import ADE20K151Dataset 3 | from .cityscapes_metric import CityscapesMetricDataset 4 | from .cityscapes_20 import Cityscapes20Dataset, LoadAnnotationsCityscapes20 5 | from .coco_stuff_172 import COCOStuff172Dataset, LoadAnnotationsCOCOStuff172 6 | from .pipelines import ToMask, SETR_Resize 7 | 8 | __all__ = ['ADE20K151Dataset', 'Cityscapes20Dataset', 'LoadAnnotationsCityscapes20', 9 | 'COCOStuff172Dataset', 'LoadAnnotationsCOCOStuff172', 10 | 'ToMask', 'SETR_Resize', 'ADE20KMetricDataset', 'CityscapesMetricDataset'] -------------------------------------------------------------------------------- /mmseg_custom/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .formatting import DefaultFormatBundle, ToMask 3 | from .transform import MapillaryHack, PadShortSide, SETR_Resize 4 | 5 | __all__ = [ 6 | 'DefaultFormatBundle', 'ToMask', 'SETR_Resize', 'PadShortSide', 7 | 'MapillaryHack' 8 | ] 9 | -------------------------------------------------------------------------------- /mmseg_custom/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .decode_heads import * 3 | from .segmentors import * 4 | from .plugins import * 5 | from .utils import * 6 | from .losses import * -------------------------------------------------------------------------------- /mmseg_custom/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .mit_custom_init_weights import MixVisionTransformerCustomInitWeights 2 | # from .resnet_custom_init_weights import ResNetV1cCustomInitWeights 3 | # from .mobilenet_v2_custom_init_weights import MobileNetV2CustomInitWeights 4 | # from .resnext_custom_init_weights import ResNeXtCustomInitWeights 5 | # from .beit_adapter import BEiTAdapter 6 | # from .beit_adapter_custom_init_weights import BEiTAdapterCustomInitWeights 7 | 8 | __all__ = ['MixVisionTransformerCustomInitWeights', 9 | # 'ResNetV1cCustomInitWeights', 10 | # 'MobileNetV2CustomInitWeights', 'ResNeXtCustomInitWeights', 11 | # 'BEiTAdapter', 'BEiTAdapterCustomInitWeights' 12 | ] -------------------------------------------------------------------------------- /mmseg_custom/models/backbones/mit_custom_init_weights.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.checkpoint import load_checkpoint 2 | 3 | from mmseg.models.backbones import MixVisionTransformer 4 | from mmseg.models.builder import BACKBONES 5 | from mmseg.utils import get_root_logger 6 | 7 | @BACKBONES.register_module() 8 | class MixVisionTransformerCustomInitWeights(MixVisionTransformer): 9 | def init_weights(self): 10 | if self.init_cfg.get('type', None) == 'Pretrained': 11 | pretrained = self.init_cfg['checkpoint'] 12 | if isinstance(pretrained, str): 13 | logger = get_root_logger() 14 | load_checkpoint(self, pretrained, strict=False, logger=logger, 15 | revise_keys=[(r'^module\.', ''), (r'^backbone\.', '')]) 16 | -------------------------------------------------------------------------------- /mmseg_custom/models/backbones/mobilenet_v2_custom_init_weights.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.checkpoint import load_checkpoint 2 | 3 | from mmseg.models.backbones import MobileNetV2 4 | from mmseg.models.builder import BACKBONES 5 | from mmseg.utils import get_root_logger 6 | 7 | @BACKBONES.register_module() 8 | class MobileNetV2CustomInitWeights(MobileNetV2): 9 | def init_weights(self): 10 | if isinstance(self.init_cfg, list): 11 | super(MobileNetV2CustomInitWeights, self).init_weights() 12 | elif self.init_cfg.get('type', None) == 'Pretrained': 13 | pretrained = self.init_cfg['checkpoint'] 14 | if isinstance(pretrained, str): 15 | logger = get_root_logger() 16 | load_checkpoint(self, pretrained, strict=False, logger=logger, 17 | revise_keys=[(r'^module\.', ''), (r'^backbone\.', '')]) 18 | else: 19 | raise NotImplementedError 20 | -------------------------------------------------------------------------------- /mmseg_custom/models/backbones/resnet_custom_init_weights.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.checkpoint import load_checkpoint 2 | 3 | from mmseg.models.backbones import ResNetV1c 4 | from mmseg.models.builder import BACKBONES 5 | from mmseg.utils import get_root_logger 6 | 7 | @BACKBONES.register_module() 8 | class ResNetV1cCustomInitWeights(ResNetV1c): 9 | def init_weights(self): 10 | if isinstance(self.init_cfg, list): 11 | super(ResNetV1cCustomInitWeights, self).init_weights() 12 | elif self.init_cfg.get('type', None) == 'Pretrained': 13 | pretrained = self.init_cfg['checkpoint'] 14 | if isinstance(pretrained, str): 15 | logger = get_root_logger() 16 | load_checkpoint(self, pretrained, strict=False, logger=logger, 17 | revise_keys=[(r'^module\.', ''), (r'^backbone\.', '')]) 18 | else: 19 | raise NotImplementedError 20 | -------------------------------------------------------------------------------- /mmseg_custom/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings # noqa: F401,F403 3 | 4 | from mmcv.utils import Registry 5 | 6 | TRANSFORMER = Registry('Transformer') 7 | MASK_ASSIGNERS = Registry('mask_assigner') 8 | MATCH_COST = Registry('match_cost') 9 | 10 | 11 | def build_match_cost(cfg): 12 | """Build Match Cost.""" 13 | return MATCH_COST.build(cfg) 14 | 15 | 16 | def build_assigner(cfg): 17 | """Build Assigner.""" 18 | return MASK_ASSIGNERS.build(cfg) 19 | 20 | 21 | def build_transformer(cfg): 22 | """Build Transformer.""" 23 | return TRANSFORMER.build(cfg) 24 | -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .segformer_head_unet_fc_head_multi_step import ( 2 | SegformerHeadUnetFCHeadMultiStep, 3 | ) 4 | from .segformer_head_unet_fc_head_single_step import ( 5 | SegformerHeadUnetFCHeadSingleStep, 6 | ) 7 | 8 | __all__ = [ 9 | 'SegformerHeadUnetFCHeadMultiStep', 10 | 'SegformerHeadUnetFCHeadSingleStep', 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import log_1_min_a, log_add_exp, extract,\ 2 | log_categorical, index_to_log_onehot, log_onehot_to_index 3 | from .schedule import alpha_schedule, alpha_schedule_torch, q_pred, cos_alpha_schedule_torch, cos_alpha_schedule, q_posterior, q_posterior_log, q_pred_log 4 | 5 | __all__ = ['log_1_min_a', 'log_add_exp', 'extract', 'log_categorical', 6 | 'index_to_log_onehot', 'log_onehot_to_index', 'q_pred', 7 | 'alpha_schedule', 'alpha_schedule_torch', 8 | 'cos_alpha_schedule', 'cos_alpha_schedule_torch', 'q_posterior', 9 | 'q_posterior_log', 'q_pred_log' 10 | ] -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/diffusion/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def log_1_min_a(a): 7 | return torch.log(1 - a.exp() + 1e-40) 8 | 9 | 10 | def log_add_exp(a, b): 11 | # https://blog.csdn.net/lovewubo/article/details/37939725 12 | maximum = torch.max(a, b) 13 | return maximum + torch.log(torch.exp(a - maximum) + torch.exp(b - maximum)) 14 | 15 | 16 | def extract(a, t, x_shape): 17 | b, *_ = t.shape 18 | out = a.gather(-1, t) 19 | return out.reshape(b, *((1,) * (len(x_shape) - 1))) 20 | 21 | 22 | def log_categorical(log_x_start, log_prob): 23 | return (log_x_start.exp() * log_prob).sum(dim=1) 24 | 25 | 26 | def index_to_log_onehot(x, num_classes): 27 | assert x.max().item() < num_classes, \ 28 | f'Error: {x.max().item()} >= {num_classes}' 29 | x_onehot = F.one_hot(x, num_classes) 30 | permute_order = (0, -1) + tuple(range(1, len(x.size()))) 31 | x_onehot = x_onehot.permute(permute_order) 32 | log_x = torch.log(x_onehot.float().clamp(min=1e-30)) 33 | return log_x 34 | 35 | 36 | def log_onehot_to_index(log_x): 37 | return log_x.argmax(1) 38 | 39 | def sample_categorical(logits): # use gumbel to sample onehot vector from log probability 40 | uniform = torch.rand_like(logits) 41 | gumbel_noise = -torch.log(-torch.log(uniform + 1e-30) + 1e-30) 42 | sample = (gumbel_noise + logits).argmax(dim=1).long() 43 | # log_sample = index_to_log_onehot(sample, num_classes) 44 | return sample -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet import Unet 2 | from .unet_time_embed import Unet as UnetTimeEmbedding 3 | 4 | __all__ = ['Unet', 'UnetTimeEmbedding'] -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/unet/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | from torch import einsum, nn 4 | 5 | from .norm import LayerNorm 6 | 7 | 8 | class LinearAttention(nn.Module): 9 | def __init__(self, dim, heads = 4, dim_head = 32): 10 | super().__init__() 11 | self.scale = dim_head ** -0.5 12 | self.heads = heads 13 | hidden_dim = dim_head * heads 14 | self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False) 15 | 16 | self.to_out = nn.Sequential( 17 | nn.Conv2d(hidden_dim, dim, 1), 18 | LayerNorm(dim) 19 | ) 20 | 21 | def forward(self, x): 22 | b, c, h, w = x.shape 23 | qkv = self.to_qkv(x).chunk(3, dim = 1) 24 | q, k, v = map(lambda t: rearrange(t, 'b (h c) x y -> b h c (x y)', h = self.heads), qkv) 25 | 26 | q = q.softmax(dim = -2) 27 | k = k.softmax(dim = -1) 28 | 29 | q = q * self.scale 30 | v = v / (h * w) 31 | 32 | context = torch.einsum('b h d n, b h e n -> b h d e', k, v) 33 | 34 | out = torch.einsum('b h d e, b h d n -> b h e n', context, q) 35 | out = rearrange(out, 'b h c (x y) -> b (h c) x y', h = self.heads, x = h, y = w) 36 | return self.to_out(out) 37 | 38 | class Attention(nn.Module): 39 | def __init__(self, dim, heads = 4, dim_head = 32): 40 | super().__init__() 41 | self.scale = dim_head ** -0.5 42 | self.heads = heads 43 | hidden_dim = dim_head * heads 44 | 45 | self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False) 46 | self.to_out = nn.Conv2d(hidden_dim, dim, 1) 47 | 48 | def forward(self, x): 49 | b, c, h, w = x.shape 50 | qkv = self.to_qkv(x).chunk(3, dim = 1) 51 | q, k, v = map(lambda t: rearrange(t, 'b (h c) x y -> b h c (x y)', h = self.heads), qkv) 52 | 53 | q = q * self.scale 54 | 55 | sim = einsum('b h d i, b h d j -> b h i j', q, k) 56 | attn = sim.softmax(dim = -1) 57 | out = einsum('b h i j, b h d j -> b h i d', attn, v) 58 | 59 | out = rearrange(out, 'b h (x y) d -> b (h d) x y', x = h, y = w) 60 | return self.to_out(out) -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/unet/norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class LayerNorm(nn.Module): 5 | def __init__(self, dim): 6 | super().__init__() 7 | self.g = nn.Parameter(torch.ones(1, dim, 1, 1)) 8 | 9 | def forward(self, x): 10 | eps = 1e-5 if x.dtype == torch.float32 else 1e-3 11 | var = torch.var(x, dim = 1, unbiased = False, keepdim = True) 12 | mean = torch.mean(x, dim = 1, keepdim = True) 13 | return (x - mean) * (var + eps).rsqrt() * self.g 14 | 15 | class PreNorm(nn.Module): 16 | def __init__(self, dim, fn): 17 | super().__init__() 18 | self.fn = fn 19 | self.norm = LayerNorm(dim) 20 | 21 | def forward(self, x): 22 | x = self.norm(x) 23 | return self.fn(x) -------------------------------------------------------------------------------- /mmseg_custom/models/decode_heads/unet/pos_emb.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from einops import rearrange 5 | from torch import nn 6 | 7 | 8 | class LearnedSinusoidalPosEmb(nn.Module): 9 | """ following @crowsonkb 's lead with learned sinusoidal pos emb """ 10 | """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ 11 | 12 | def __init__(self, dim): 13 | super().__init__() 14 | assert (dim % 2) == 0 15 | half_dim = dim // 2 16 | self.weights = nn.Parameter(torch.randn(half_dim)) 17 | 18 | def forward(self, x): 19 | x = rearrange(x, 'b -> b 1') 20 | freqs = x * rearrange(self.weights, 'd -> 1 d') * 2 * math.pi 21 | fouriered = torch.cat((freqs.sin(), freqs.cos()), dim = -1) 22 | fouriered = torch.cat((x, fouriered), dim = -1) 23 | return fouriered 24 | 25 | 26 | class RandomOrLearnedSinusoidalPosEmb(LearnedSinusoidalPosEmb): 27 | """ following @crowsonkb 's lead with random (learned optional) sinusoidal pos emb """ 28 | """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ 29 | 30 | def __init__(self, dim, is_random = False): 31 | super().__init__(dim) 32 | assert (dim % 2) == 0 33 | half_dim = dim // 2 34 | self.weights = nn.Parameter(torch.randn(half_dim), requires_grad = not is_random) 35 | 36 | 37 | class SinusoidalPosEmb(nn.Module): 38 | def __init__(self, dim): 39 | super().__init__() 40 | self.dim = dim 41 | 42 | def forward(self, x): 43 | device = x.device 44 | half_dim = self.dim // 2 45 | emb = math.log(10000) / (half_dim - 1) 46 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 47 | emb = x[:, None] * emb[None, :] 48 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 49 | return emb -------------------------------------------------------------------------------- /mmseg_custom/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | # cross_entropy, mask_cross_entropy) 4 | from .dice_loss import DiceLoss 5 | from .focal_loss import FocalLoss 6 | from .match_costs import (ClassificationCost, CrossEntropyLossCost, DiceCost, 7 | MaskFocalLossCost) 8 | 9 | __all__ = [ 10 | # 'cross_entropy', 'binary_cross_entropy', 'mask_cross_entropy', 'CrossEntropyLoss', 11 | 'DiceLoss', 'FocalLoss', 'ClassificationCost', 12 | 'MaskFocalLossCost', 'DiceCost', 'CrossEntropyLossCost' 13 | ] 14 | -------------------------------------------------------------------------------- /mmseg_custom/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder 3 | from .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder 4 | 5 | __all__ = [ 6 | 'PixelDecoder', 'TransformerEncoderPixelDecoder', 7 | 'MSDeformAttnPixelDecoder' 8 | ] 9 | -------------------------------------------------------------------------------- /mmseg_custom/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoder_decoder_freeze import EncoderDecoderFreeze 2 | from .encoder_decoder_ensemble import EncoderDecoderEnsemble 3 | from .encoder_decoder_diffusion import EncoderDecoderDiffusion 4 | from .encoder_decoder_diffusion_ensemble import EncoderDecoderDiffusionEnsemble 5 | 6 | __all__ = ['EncoderDecoderFreeze', 7 | 'EncoderDecoderEnsemble', 8 | 'EncoderDecoderDiffusion', 9 | 'EncoderDecoderDiffusionEnsemble' 10 | ] -------------------------------------------------------------------------------- /mmseg_custom/models/segmentors/encoder_decoder_freeze.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from mmseg.core import add_prefix 6 | from mmseg.ops import resize 7 | from mmseg.models import builder 8 | from mmseg.models.builder import SEGMENTORS 9 | from mmseg.models.segmentors.encoder_decoder import EncoderDecoder 10 | from mmseg.utils import get_root_logger 11 | 12 | 13 | @SEGMENTORS.register_module() 14 | class EncoderDecoderFreeze(EncoderDecoder): 15 | def __init__(self, freeze_parameters=['backbone'], **kwargs): 16 | super(EncoderDecoderFreeze, self).__init__(**kwargs) 17 | self._set_trainable_parameters(freeze_parameters) 18 | 19 | def _set_trainable_parameters(self, freeze_parameters=None): 20 | _logger = get_root_logger() 21 | if freeze_parameters: 22 | for param_name in freeze_parameters: 23 | model = getattr(self, param_name) 24 | if hasattr(model, '_set_trainable_parameters'): 25 | model._set_trainable_parameters() 26 | else: 27 | for name, param in model.named_parameters(): 28 | param.requires_grad = False 29 | _logger.info(f'Parameters in {param_name} freezed!') -------------------------------------------------------------------------------- /mmseg_custom/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigner import MaskHungarianAssigner 2 | from .point_sample import get_uncertain_point_coords_with_randomness 3 | from .positional_encoding import (LearnedPositionalEncoding, 4 | SinePositionalEncoding) 5 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer, 6 | DynamicConv, Transformer) 7 | 8 | __all__ = [ 9 | 'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'DynamicConv', 10 | 'Transformer', 'LearnedPositionalEncoding', 'SinePositionalEncoding', 11 | 'MaskHungarianAssigner', 'get_uncertain_point_coords_with_randomness' 12 | ] -------------------------------------------------------------------------------- /tools/convert_ema_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from argparse import ArgumentParser 4 | 5 | 6 | def parse_args(): 7 | parser = ArgumentParser() 8 | parser.add_argument('checkpoint', help='Checkpoint file') 9 | parser.add_argument('--out', type=str, default="origin.pth", help='out dir') 10 | args = parser.parse_args() 11 | return args 12 | 13 | 14 | def main(args): 15 | checkpoint = args.checkpoint 16 | ema_ckpt = torch.load(checkpoint, map_location='cpu') 17 | ema_model = ema_ckpt['state_dict'] 18 | origin_model = dict() 19 | for key, value in ema_model.items(): 20 | if key.startswith('ema'): 21 | continue 22 | else: 23 | ema_key = f"ema_{key.replace('.', '_')}" 24 | origin_model[key] = ema_model[ema_key] 25 | origin_ckpt = {'state_dict':origin_model} 26 | output_dir = os.path.dirname(checkpoint) 27 | output_file = os.path.join(output_dir, args.out) 28 | torch.save(origin_ckpt, output_file) 29 | print('Done!') 30 | 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | main(args) -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /tools/dist_test_diffusion.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test_diffusion.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /tools/dist_test_diffusion_origin.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test_diffusion.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | GPUS=$2 3 | NNODES=${NNODES:-1} 4 | NODE_RANK=${NODE_RANK:-0} 5 | PORT=${PORT:-29500} 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch \ 10 | --nnodes=$NNODES \ 11 | --node_rank=$NODE_RANK \ 12 | --master_addr=$MASTER_ADDR \ 13 | --nproc_per_node=$GPUS \ 14 | --master_port=$PORT \ 15 | $(dirname "$0")/train.py \ 16 | $CONFIG \ 17 | --launcher pytorch ${@:3} 18 | -------------------------------------------------------------------------------- /tools/dist_train_diffusion.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | GPUS=$2 3 | NNODES=${NNODES:-1} 4 | NODE_RANK=${NODE_RANK:-0} 5 | PORT=${PORT:-29500} 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch \ 10 | --nnodes=$NNODES \ 11 | --node_rank=$NODE_RANK \ 12 | --master_addr=$MASTER_ADDR \ 13 | --nproc_per_node=$GPUS \ 14 | --master_port=$PORT \ 15 | $(dirname "$0")/train_diffusion.py \ 16 | $CONFIG \ 17 | --launcher pytorch ${@:3} 18 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import sys 4 | from mmcv import Config 5 | from mmcv.cnn import get_model_complexity_info 6 | 7 | from mmseg.models import build_segmentor 8 | 9 | import mmseg_custom 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Get the FLOPs of a segmentor') 14 | parser.add_argument('config', help='train config file path') 15 | parser.add_argument( 16 | '--shape', 17 | type=int, 18 | nargs='+', 19 | default=[2048, 1024], 20 | help='input image size') 21 | parser.add_argument( 22 | '--out', 23 | type=str, 24 | default=None) 25 | args = parser.parse_args() 26 | return args 27 | 28 | 29 | def main(): 30 | 31 | args = parse_args() 32 | 33 | if len(args.shape) == 1: 34 | input_shape = (3, args.shape[0], args.shape[0]) 35 | elif len(args.shape) == 2: 36 | input_shape = (3, ) + tuple(args.shape) 37 | else: 38 | raise ValueError('invalid input shape') 39 | 40 | 41 | cfg = Config.fromfile(args.config) 42 | cfg.model.pretrained = None 43 | model = build_segmentor( 44 | cfg.model, 45 | train_cfg=cfg.get('train_cfg'), 46 | test_cfg=cfg.get('test_cfg')).cuda() 47 | model.eval() 48 | 49 | if hasattr(model, 'forward_dummy'): 50 | model.forward = model.forward_dummy 51 | else: 52 | raise NotImplementedError( 53 | 'FLOPs counter is currently not currently supported with {}'. 54 | format(model.__class__.__name__)) 55 | if args.out is not None: 56 | f = open(args.out, 'w') 57 | ost = f 58 | else: 59 | ost = sys.stdout 60 | flops, params = get_model_complexity_info(model, input_shape, ost=ost) 61 | split_line = '=' * 30 62 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 63 | split_line, input_shape, flops, params)) 64 | print('!!!Please be cautious if you use the results in papers. ' 65 | 'You may need to check if all ops are supported and verify that the ' 66 | 'flops computation is correct.') 67 | if args.out is not None: 68 | f.close() 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /tools/get_params.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import sys 4 | from mmcv import Config 5 | 6 | from mmseg.models import build_segmentor 7 | 8 | import mmseg_custom 9 | 10 | 11 | def model_size(model): 12 | total = sum([param.nelement() for param in model.parameters()]) 13 | return total / 1e6 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser( 18 | description='Get the FLOPs of a segmentor') 19 | parser.add_argument('config', help='train config file path') 20 | parser.add_argument( 21 | '--out', 22 | type=str, 23 | default=None) 24 | args = parser.parse_args() 25 | return args 26 | 27 | 28 | def main(): 29 | 30 | args = parse_args() 31 | 32 | cfg = Config.fromfile(args.config) 33 | cfg.model.pretrained = None 34 | model = build_segmentor( 35 | cfg.model, 36 | train_cfg=cfg.get('train_cfg'), 37 | test_cfg=cfg.get('test_cfg')).cuda() 38 | model.eval() 39 | 40 | print('total', model_size(model)) 41 | print('backbone', model_size(model.backbone)) 42 | print('decode_head', model_size(model.decode_head)) 43 | if hasattr(model.decode_head, 'unet'): 44 | print('decode_head.unet', model_size(model.decode_head.unet)) 45 | 46 | 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-4} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | --------------------------------------------------------------------------------