├── LOGO_.jpg
├── Readme.md
├── classification
    ├── ade_corruptions.py
    ├── attacks
    │   ├── __init__.py
    │   ├── attack.py
    │   ├── bim.py
    │   ├── difgsm.py
    │   ├── fgsm.py
    │   ├── mifgsm.py
    │   ├── pgd.py
    │   ├── tifgsm.py
    │   ├── tpgd.py
    │   └── vmifgsm.py
    ├── coco_corruptions.py
    ├── datasets
    │   ├── __init__.py
    │   ├── dataset5k.py
    │   ├── imagenet_b_dataset.py
    │   ├── imagenet_dataset.py
    │   └── imagenet_v2_dataset.py
    ├── evaluate.py
    ├── evaluate_scanline_infodrop.py
    ├── generate_adv_images.py
    ├── image_list.json
    ├── imagecorruptions
    │   ├── __init__.py
    │   ├── corruptions.py
    │   └── frost
    │   │   ├── frost1.png
    │   │   ├── frost2.png
    │   │   ├── frost3.png
    │   │   ├── frost4.jpg
    │   │   ├── frost5.jpg
    │   │   └── frost6.jpg
    ├── imgnet_d2imgnet_id.txt
    ├── inference.py
    ├── inference_on_imagenet_c.py
    ├── models
    │   ├── __init__.py
    │   ├── csm_triton.py
    │   ├── csms6s.py
    │   ├── vmamba.py
    │   └── vmamba_checks.py
    ├── scripts
    │   ├── evaluate_transferability.sh
    │   ├── gen_adv_images.sh
    │   ├── get_adv_freq_results.sh
    │   ├── random_patch_drop.sh
    │   ├── salient_drop.sh
    │   ├── scan_line_info_drop.sh
    │   └── shuffle_image.sh
    └── vit_models_ipvit
    │   ├── __init__.py
    │   ├── deit.py
    │   ├── deit_ensemble.py
    │   ├── deit_modified.py
    │   ├── dino.py
    │   ├── resnet.py
    │   ├── t2t_vit.py
    │   ├── t2t_vit_dense.py
    │   ├── t2t_vit_ghost.py
    │   ├── t2t_vit_se.py
    │   ├── tnt.py
    │   ├── token_performer.py
    │   ├── token_transformer.py
    │   ├── transformer_block.py
    │   └── vit.py
├── detection
    ├── __init__.py
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── ade20k_instance.py
    │   │   │   ├── ade20k_panoptic.py
    │   │   │   ├── ade20k_semantic.py
    │   │   │   ├── cityscapes_detection.py
    │   │   │   ├── cityscapes_instance.py
    │   │   │   ├── coco_caption.py
    │   │   │   ├── coco_detection.py
    │   │   │   ├── coco_instance.py
    │   │   │   ├── coco_instance_semantic.py
    │   │   │   ├── coco_panoptic.py
    │   │   │   ├── coco_semantic.py
    │   │   │   ├── deepfashion.py
    │   │   │   ├── dsdl.py
    │   │   │   ├── isaid_instance.py
    │   │   │   ├── lvis_v0.5_instance.py
    │   │   │   ├── lvis_v1_instance.py
    │   │   │   ├── mot_challenge.py
    │   │   │   ├── mot_challenge_det.py
    │   │   │   ├── mot_challenge_reid.py
    │   │   │   ├── objects365v1_detection.py
    │   │   │   ├── objects365v2_detection.py
    │   │   │   ├── openimages_detection.py
    │   │   │   ├── refcoco+.py
    │   │   │   ├── refcoco.py
    │   │   │   ├── refcocog.py
    │   │   │   ├── semi_coco_detection.py
    │   │   │   ├── v3det.py
    │   │   │   ├── voc0712.py
    │   │   │   ├── wider_face.py
    │   │   │   └── youtube_vis.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade-mask-rcnn_r50_fpn.py
    │   │   │   ├── cascade-rcnn_r50_fpn.py
    │   │   │   ├── fast-rcnn_r50_fpn.py
    │   │   │   ├── faster-rcnn_r50-caffe-c4.py
    │   │   │   ├── faster-rcnn_r50-caffe-dc5.py
    │   │   │   ├── faster-rcnn_r50_fpn.py
    │   │   │   ├── mask-rcnn_r50-caffe-c4.py
    │   │   │   ├── mask-rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50-caffe-c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   └── schedule_2x.py
    │   ├── convnext
    │   │   ├── README.md
    │   │   ├── cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
    │   │   ├── cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
    │   │   ├── mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
    │   │   └── metafile.yml
    │   ├── mask_rcnn
    │   │   ├── README.md
    │   │   ├── mask-rcnn_r101-caffe_fpn_1x_coco.py
    │   │   ├── mask-rcnn_r101-caffe_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_r101_fpn_1x_coco.py
    │   │   ├── mask-rcnn_r101_fpn_2x_coco.py
    │   │   ├── mask-rcnn_r101_fpn_8xb8-amp-lsj-200e_coco.py
    │   │   ├── mask-rcnn_r101_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_r18_fpn_8xb8-amp-lsj-200e_coco.py
    │   │   ├── mask-rcnn_r50-caffe-c4_1x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_1x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_ms-1x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_ms-poly-2x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_r50-caffe_fpn_poly-1x_coco_v1.py
    │   │   ├── mask-rcnn_r50_fpn_1x-wandb_coco.py
    │   │   ├── mask-rcnn_r50_fpn_1x_coco.py
    │   │   ├── mask-rcnn_r50_fpn_2x_coco.py
    │   │   ├── mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py
    │   │   ├── mask-rcnn_r50_fpn_amp-1x_coco.py
    │   │   ├── mask-rcnn_r50_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_r50_fpn_poly-1x_coco.py
    │   │   ├── mask-rcnn_x101-32x4d_fpn_1x_coco.py
    │   │   ├── mask-rcnn_x101-32x4d_fpn_2x_coco.py
    │   │   ├── mask-rcnn_x101-32x4d_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_x101-32x8d_fpn_1x_coco.py
    │   │   ├── mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py
    │   │   ├── mask-rcnn_x101-32x8d_fpn_ms-poly-3x_coco.py
    │   │   ├── mask-rcnn_x101-64x4d_fpn_1x_coco.py
    │   │   ├── mask-rcnn_x101-64x4d_fpn_2x_coco.py
    │   │   ├── mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco.py
    │   │   └── metafile.yml
    │   ├── swin
    │   │   ├── README.md
    │   │   ├── mask-rcnn_swin-s-p4-w7_fpn_amp-ms-crop-3x_coco.py
    │   │   ├── mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py
    │   │   ├── mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
    │   │   ├── mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py
    │   │   ├── metafile.yml
    │   │   └── retinanet_swin-t-p4-w7_fpn_1x_coco.py
    │   ├── vmamba
    │   │   ├── mask_rcnn_vmamba_fpn_coco_base.py
    │   │   ├── mask_rcnn_vmamba_fpn_coco_small.py
    │   │   ├── mask_rcnn_vmamba_fpn_coco_small_ms_3x.py
    │   │   ├── mask_rcnn_vmamba_fpn_coco_tiny.py
    │   │   └── mask_rcnn_vmamba_fpn_coco_tiny_ms_3x.py
    │   ├── vssm
    │   │   ├── mask_rcnn_vssm_fpn_coco_base.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_small.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_small_ms_3x.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_tiny.py
    │   │   └── mask_rcnn_vssm_fpn_coco_tiny_ms_3x.py
    │   └── vssm1
    │   │   ├── mask_rcnn_vssm_fpn_coco_base.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_small.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_small_ms_3x.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_tiny.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_tiny1.py
    │   │   ├── mask_rcnn_vssm_fpn_coco_tiny1_ms_3x.py
    │   │   └── mask_rcnn_vssm_fpn_coco_tiny_ms_3x.py
    ├── model.py
    ├── readme.md
    └── tools
    │   ├── analysis_tools
    │       ├── analyze_logs.py
    │       ├── analyze_results.py
    │       ├── benchmark.py
    │       ├── browse_dataset.py
    │       ├── coco_error_analysis.py
    │       ├── coco_occluded_separated_recall.py
    │       ├── confusion_matrix.py
    │       ├── eval_metric.py
    │       ├── fuse_results.py
    │       ├── get_flops.py
    │       ├── mot
    │       │   ├── browse_dataset.py
    │       │   ├── dist_mot_search.sh
    │       │   ├── mot_error_visualize.py
    │       │   ├── mot_param_search.py
    │       │   └── slurm_mot_search.sh
    │       ├── optimize_anchors.py
    │       ├── robustness_eval.py
    │       └── test_robustness.py
    │   ├── dataset_converters
    │       ├── ade20k2coco.py
    │       ├── cityscapes.py
    │       ├── coco_stuff164k.py
    │       ├── crowdhuman2coco.py
    │       ├── images2coco.py
    │       ├── mot2coco.py
    │       ├── mot2reid.py
    │       ├── pascal_voc.py
    │       ├── prepare_coco_semantic_annos_from_panoptic_annos.py
    │       ├── scripts
    │       │   ├── preprocess_coco2017.sh
    │       │   ├── preprocess_voc2007.sh
    │       │   └── preprocess_voc2012.sh
    │       └── youtubevis2coco.py
    │   ├── deployment
    │       ├── mmdet2torchserve.py
    │       ├── mmdet_handler.py
    │       └── test_torchserver.py
    │   ├── dist_test.sh
    │   ├── dist_test_tracking.sh
    │   ├── dist_train.sh
    │   ├── misc
    │       ├── download_dataset.py
    │       ├── gen_coco_panoptic_test_info.py
    │       ├── get_crowdhuman_id_hw.py
    │       ├── get_image_metas.py
    │       ├── print_config.py
    │       └── split_coco.py
    │   ├── model_converters
    │       ├── detectron2_to_mmdet.py
    │       ├── detectron2pytorch.py
    │       ├── detic_to_mmdet.py
    │       ├── glip_to_mmdet.py
    │       ├── groundingdino_to_mmdet.py
    │       ├── publish_model.py
    │       ├── regnet2mmdet.py
    │       ├── selfsup2mmdet.py
    │       ├── swinv1_to_mmdet.py
    │       ├── upgrade_model_version.py
    │       └── upgrade_ssd_version.py
    │   ├── slurm_test.sh
    │   ├── slurm_test_tracking.sh
    │   ├── slurm_train.sh
    │   ├── test.py
    │   ├── test_tracking.py
    │   └── train.py
├── kernels
    └── selective_scan
    │   ├── README.md
    │   ├── csrc
    │       └── selective_scan
    │       │   ├── cub_extra.cuh
    │       │   ├── cus
    │       │       ├── selective_scan.cpp
    │       │       ├── selective_scan_bwd_kernel.cuh
    │       │       ├── selective_scan_core_bwd.cu
    │       │       ├── selective_scan_core_fwd.cu
    │       │       └── selective_scan_fwd_kernel.cuh
    │       │   ├── cusndstate
    │       │       ├── selective_scan_bwd_kernel_ndstate.cuh
    │       │       ├── selective_scan_core_bwd.cu
    │       │       ├── selective_scan_core_fwd.cu
    │       │       ├── selective_scan_fwd_kernel_ndstate.cuh
    │       │       ├── selective_scan_ndstate.cpp
    │       │       └── selective_scan_ndstate.h
    │       │   ├── cusnrow
    │       │       ├── selective_scan_bwd_kernel_nrow.cuh
    │       │       ├── selective_scan_core_bwd.cu
    │       │       ├── selective_scan_core_bwd2.cu
    │       │       ├── selective_scan_core_bwd3.cu
    │       │       ├── selective_scan_core_bwd4.cu
    │       │       ├── selective_scan_core_fwd.cu
    │       │       ├── selective_scan_core_fwd2.cu
    │       │       ├── selective_scan_core_fwd3.cu
    │       │       ├── selective_scan_core_fwd4.cu
    │       │       ├── selective_scan_fwd_kernel_nrow.cuh
    │       │       └── selective_scan_nrow.cpp
    │       │   ├── cusoflex
    │       │       ├── selective_scan_bwd_kernel_oflex.cuh
    │       │       ├── selective_scan_core_bwd.cu
    │       │       ├── selective_scan_core_fwd.cu
    │       │       ├── selective_scan_fwd_kernel_oflex.cuh
    │       │       └── selective_scan_oflex.cpp
    │       │   ├── reverse_scan.cuh
    │       │   ├── selective_scan.h
    │       │   ├── selective_scan_common.h
    │       │   ├── static_switch.h
    │       │   └── uninitialized_copy.cuh
    │   ├── setup.py
    │   └── test_selective_scan.py
├── pretrained_weights
    └── .gitkeep
├── req.txt
└── segmentation
    ├── __init__.py
    ├── configs
        ├── _base_
        │   ├── datasets
        │   │   ├── ade20k.py
        │   │   ├── ade20k_640x640.py
        │   │   ├── bdd100k.py
        │   │   ├── chase_db1.py
        │   │   ├── cityscapes.py
        │   │   ├── cityscapes_1024x1024.py
        │   │   ├── cityscapes_768x768.py
        │   │   ├── cityscapes_769x769.py
        │   │   ├── cityscapes_832x832.py
        │   │   ├── coco-stuff10k.py
        │   │   ├── coco-stuff164k.py
        │   │   ├── drive.py
        │   │   ├── hrf.py
        │   │   ├── isaid.py
        │   │   ├── levir_256x256.py
        │   │   ├── loveda.py
        │   │   ├── mapillary_v1.py
        │   │   ├── mapillary_v1_65.py
        │   │   ├── mapillary_v2.py
        │   │   ├── nyu.py
        │   │   ├── nyu_512x512.py
        │   │   ├── pascal_context.py
        │   │   ├── pascal_context_59.py
        │   │   ├── pascal_voc12.py
        │   │   ├── pascal_voc12_aug.py
        │   │   ├── potsdam.py
        │   │   ├── refuge.py
        │   │   ├── stare.py
        │   │   ├── synapse.py
        │   │   └── vaihingen.py
        │   ├── default_runtime.py
        │   ├── models
        │   │   ├── ann_r50-d8.py
        │   │   ├── apcnet_r50-d8.py
        │   │   ├── bisenetv1_r18-d32.py
        │   │   ├── bisenetv2.py
        │   │   ├── ccnet_r50-d8.py
        │   │   ├── cgnet.py
        │   │   ├── danet_r50-d8.py
        │   │   ├── deeplabv3_r50-d8.py
        │   │   ├── deeplabv3_unet_s5-d16.py
        │   │   ├── deeplabv3plus_r50-d8.py
        │   │   ├── dmnet_r50-d8.py
        │   │   ├── dnl_r50-d8.py
        │   │   ├── dpt_vit-b16.py
        │   │   ├── emanet_r50-d8.py
        │   │   ├── encnet_r50-d8.py
        │   │   ├── erfnet_fcn.py
        │   │   ├── fast_scnn.py
        │   │   ├── fastfcn_r50-d32_jpu_psp.py
        │   │   ├── fcn_hr18.py
        │   │   ├── fcn_r50-d8.py
        │   │   ├── fcn_unet_s5-d16.py
        │   │   ├── fpn_poolformer_s12.py
        │   │   ├── fpn_r50.py
        │   │   ├── gcnet_r50-d8.py
        │   │   ├── icnet_r50-d8.py
        │   │   ├── isanet_r50-d8.py
        │   │   ├── lraspp_m-v3-d8.py
        │   │   ├── nonlocal_r50-d8.py
        │   │   ├── ocrnet_hr18.py
        │   │   ├── ocrnet_r50-d8.py
        │   │   ├── pointrend_r50.py
        │   │   ├── psanet_r50-d8.py
        │   │   ├── pspnet_r50-d8.py
        │   │   ├── pspnet_unet_s5-d16.py
        │   │   ├── san_vit-b16.py
        │   │   ├── segformer_mit-b0.py
        │   │   ├── segmenter_vit-b16_mask.py
        │   │   ├── setr_mla.py
        │   │   ├── setr_naive.py
        │   │   ├── setr_pup.py
        │   │   ├── stdc.py
        │   │   ├── twins_pcpvt-s_fpn.py
        │   │   ├── twins_pcpvt-s_upernet.py
        │   │   ├── upernet_beit.py
        │   │   ├── upernet_convnext.py
        │   │   ├── upernet_mae.py
        │   │   ├── upernet_r50.py
        │   │   ├── upernet_swin.py
        │   │   ├── upernet_vit-b16_ln_mln.py
        │   │   └── vpd_sd.py
        │   └── schedules
        │   │   ├── schedule_160k.py
        │   │   ├── schedule_20k.py
        │   │   ├── schedule_240k.py
        │   │   ├── schedule_25k.py
        │   │   ├── schedule_320k.py
        │   │   ├── schedule_40k.py
        │   │   └── schedule_80k.py
        ├── convnext
        │   ├── README.md
        │   ├── convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py
        │   ├── convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py
        │   ├── convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py
        │   ├── convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py
        │   ├── convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py
        │   ├── convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py
        │   └── metafile.yaml
        ├── swin
        │   ├── README.md
        │   ├── metafile.yaml
        │   ├── swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
        │   └── swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py
        ├── upernet
        │   ├── README.md
        │   ├── metafile.yaml
        │   ├── upernet_r101_4xb2-40k_cityscapes-512x1024.py
        │   ├── upernet_r101_4xb2-40k_cityscapes-769x769.py
        │   ├── upernet_r101_4xb2-80k_cityscapes-512x1024.py
        │   ├── upernet_r101_4xb2-80k_cityscapes-769x769.py
        │   ├── upernet_r101_4xb4-160k_ade20k-512x512.py
        │   ├── upernet_r101_4xb4-20k_voc12aug-512x512.py
        │   ├── upernet_r101_4xb4-40k_voc12aug-512x512.py
        │   ├── upernet_r101_4xb4-80k_ade20k-512x512.py
        │   ├── upernet_r18_4xb2-40k_cityscapes-512x1024.py
        │   ├── upernet_r18_4xb2-80k_cityscapes-512x1024.py
        │   ├── upernet_r18_4xb4-160k_ade20k-512x512.py
        │   ├── upernet_r18_4xb4-20k_voc12aug-512x512.py
        │   ├── upernet_r18_4xb4-40k_voc12aug-512x512.py
        │   ├── upernet_r18_4xb4-80k_ade20k-512x512.py
        │   ├── upernet_r50_4xb2-40k_cityscapes-512x1024.py
        │   ├── upernet_r50_4xb2-40k_cityscapes-769x769.py
        │   ├── upernet_r50_4xb2-80k_cityscapes-512x1024.py
        │   ├── upernet_r50_4xb2-80k_cityscapes-769x769.py
        │   ├── upernet_r50_4xb4-160k_ade20k-512x512.py
        │   ├── upernet_r50_4xb4-20k_voc12aug-512x512.py
        │   ├── upernet_r50_4xb4-40k_voc12aug-512x512.py
        │   └── upernet_r50_4xb4-80k_ade20k-512x512.py
        ├── vit
        │   ├── README.md
        │   ├── metafile.yaml
        │   ├── vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py
        │   ├── vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py
        │   ├── vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   ├── vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
        │   └── vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py
        ├── vmamba
        │   ├── upernet_vmamba_4xb4-160k_ade20k-512x512_base.py
        │   ├── upernet_vmamba_4xb4-160k_ade20k-512x512_small.py
        │   ├── upernet_vmamba_4xb4-160k_ade20k-512x512_tiny.py
        │   ├── upernet_vmamba_4xb4-160k_ade20k-640x640_small.py
        │   └── upernet_vmamba_4xb4-160k_ade20k-896x896_small.py
        ├── vssm
        │   ├── upernet_convnext_4xb4-160k_ade20k-640x640_small.py
        │   ├── upernet_convnext_4xb4-160k_ade20k-896x896_small.py
        │   ├── upernet_internimage_g_896_160k_ade20k.py
        │   ├── upernet_swin_4xb4-160k_ade20k-640x640_small.py
        │   ├── upernet_swin_4xb4-160k_ade20k-896x896_small.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_base.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_small.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-640x640_small.py
        │   └── upernet_vssm_4xb4-160k_ade20k-896x896_small.py
        └── vssm1
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_base.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_small.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny1.py
        │   ├── upernet_vssm_4xb4-160k_ade20k-640x640_small.py
        │   └── upernet_vssm_4xb4-160k_ade20k-896x896_small.py
    ├── model.py
    ├── readme.md
    └── tools
        ├── analysis_tools
            ├── analyze_logs.py
            ├── benchmark.py
            ├── browse_dataset.py
            ├── confusion_matrix.py
            ├── get_flops.py
            └── visualization_cam.py
        ├── dataset_converters
            ├── chase_db1.py
            ├── cityscapes.py
            ├── coco_stuff10k.py
            ├── coco_stuff164k.py
            ├── drive.py
            ├── hrf.py
            ├── isaid.py
            ├── levircd.py
            ├── loveda.py
            ├── nyu.py
            ├── pascal_context.py
            ├── potsdam.py
            ├── refuge.py
            ├── stare.py
            ├── synapse.py
            ├── vaihingen.py
            └── voc_aug.py
        ├── deployment
            └── pytorch2torchscript.py
        ├── dist_test.sh
        ├── dist_train.sh
        ├── misc
            ├── browse_dataset.py
            ├── print_config.py
            └── publish_model.py
        ├── model_converters
            ├── beit2mmseg.py
            ├── clip2mmseg.py
            ├── mit2mmseg.py
            ├── san2mmseg.py
            ├── stdc2mmseg.py
            ├── swin2mmseg.py
            ├── twins2mmseg.py
            ├── vit2mmseg.py
            └── vitjax2mmseg.py
        ├── slurm_test.sh
        ├── slurm_train.sh
        ├── test.py
        ├── torchserve
            ├── mmseg2torchserve.py
            ├── mmseg_handler.py
            └── test_torchserve.py
        └── train.py


/LOGO_.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/LOGO_.jpg


--------------------------------------------------------------------------------
/classification/attacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .pgd import PGD
2 | from .fgsm import FGSM
3 | from .bim import BIM
4 | from .mifgsm import MIFGSM
5 | from .difgsm import DIFGSM
6 | from .tpgd import TPGD
7 | from .tifgsm import TIFGSM
8 | from .vmifgsm import VMIFGSM
9 | 


--------------------------------------------------------------------------------
/classification/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/datasets/__init__.py


--------------------------------------------------------------------------------
/classification/datasets/dataset5k.py:
--------------------------------------------------------------------------------
 1 | import torchvision
 2 | import json
 3 | import torch
 4 | import os
 5 | class ImageNet5k(torchvision.datasets.ImageFolder):
 6 | 
 7 |     def __init__(self, image_list="./image_list.json", *args, **kwargs):
 8 |         self.image_list = set(json.load(open(image_list, "r"))["images"])
 9 |         super(ImageNet5k, self).__init__(is_valid_file=self.is_valid_file, *args, **kwargs)
10 | 
11 |     def is_valid_file(self, x: str) -> bool:
12 | 
13 |         file_path = x
14 |         # get image name
15 |         image_name = os.path.basename(file_path)
16 |         # get parent folder name
17 |         folder_name = os.path.basename(os.path.dirname(file_path))
18 | 
19 |         return f"{folder_name}/{image_name}" in self.image_list
20 | 
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     import matplotlib.pyplot as plt
25 |     import torchvision.transforms as transforms
26 |     import os
27 | 
28 |     # Load the image list
29 | 
30 | 
31 |     # Load the ImageNet dataset
32 |     imagenet = ImageNet5k(root=r"datasets\ImageNet\val", transform=transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()]))
33 | 
34 |     dataloader = torch.utils.data.DataLoader(imagenet, batch_size=50, shuffle=True)
35 | 
36 |     for i, (img, label) in enumerate(dataloader):
37 |         print(i, img.shape)
38 | 


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost1.png


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost2.png


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost3.png


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost4.jpg


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost5.jpg


--------------------------------------------------------------------------------
/classification/imagecorruptions/frost/frost6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost6.jpg


--------------------------------------------------------------------------------
/classification/scripts/evaluate_transferability.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | batch_size=${1:-64}
 4 | 
 5 | #model_names=("resnet18" "resnet50" "vgg16_bn" "vgg19_bn" "densenet121" "densenet161" "vit_tiny_patch16_224" "vit_small_patch16_224" "vit_base_patch16_224" "deit_tiny_patch16_224" "deit_small_patch16_224" "deit_base_patch16_224" "swin_tiny_patch4_window7_224" "swin_small_patch4_window7_224" "swin_base_patch4_window7_224" "vssm_tiny_v2" "vssm_small_v2" "vssm_base_v2")
 6 | 
 7 | model_names=("resnet50")
 8 | 
 9 | for data_path in AdvExamples/*/*/*.pt
10 |   do
11 |     echo "Evaluating transferability for adversarial examples:  ${data_path}"
12 |   for model_name in "${model_names[@]}"
13 |     do
14 |       echo "Evaluating transferability for ${model_name}"
15 |       python inference.py --dataset imagenet_adv --data_dir ${data_path}  --batch_size ${batch_size} --source_model_name ${model_name}
16 |     done
17 |   done
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/classification/vit_models_ipvit/__init__.py:
--------------------------------------------------------------------------------
 1 | # from .deit import *
 2 | # from .deit_ensemble import *
 3 | # from .deit_modified import *
 4 | from .dino import *
 5 | # from .t2t_vit import *
 6 | # from .t2t_vit_dense import *
 7 | # from .t2t_vit_ghost import *
 8 | # from .t2t_vit_se import *
 9 | # from .tnt import *
10 | # from .vit import *
11 | # from .resnet import drop_resnet50
12 | 


--------------------------------------------------------------------------------
/detection/__init__.py:
--------------------------------------------------------------------------------
1 | # configs/ and tools/ is copied from https://github.com/open-mmlab/mmdetection: version 3.3.0
2 | # tools/train.py#12 is added with "import model"
3 | 
4 |  


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/ade20k_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KInstanceDataset'
 3 | data_root = 'data/ADEChallengeData2016/'
 4 | 
 5 | # Example to use different file client
 6 | # Method 1: simply set the data root and let the file I/O module
 7 | # automatically infer from prefix (not support LMDB and Memcache yet)
 8 | 
 9 | # data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/'
10 | 
11 | # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12 | # backend_args = dict(
13 | #     backend='petrel',
14 | #     path_mapping=dict({
15 | #         './data/': 's3://openmmlab/datasets/detection/',
16 | #         'data/': 's3://openmmlab/datasets/detection/'
17 | #     }))
18 | backend_args = None
19 | 
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile', backend_args=backend_args),
22 |     dict(type='Resize', scale=(2560, 640), keep_ratio=True),
23 |     # If you don't have a gt annotation, delete the pipeline
24 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
25 |     dict(
26 |         type='PackDetInputs',
27 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
28 |                    'scale_factor'))
29 | ]
30 | 
31 | val_dataloader = dict(
32 |     batch_size=1,
33 |     num_workers=2,
34 |     persistent_workers=True,
35 |     drop_last=False,
36 |     sampler=dict(type='DefaultSampler', shuffle=False),
37 |     dataset=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         ann_file='ade20k_instance_val.json',
41 |         data_prefix=dict(img='images/validation'),
42 |         test_mode=True,
43 |         pipeline=test_pipeline,
44 |         backend_args=backend_args))
45 | test_dataloader = val_dataloader
46 | 
47 | val_evaluator = dict(
48 |     type='CocoMetric',
49 |     ann_file=data_root + 'ade20k_instance_val.json',
50 |     metric=['bbox', 'segm'],
51 |     format_only=False,
52 |     backend_args=backend_args)
53 | test_evaluator = val_evaluator
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/ade20k_panoptic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KPanopticDataset'
 3 | data_root = 'data/ADEChallengeData2016/'
 4 | 
 5 | backend_args = None
 6 | 
 7 | test_pipeline = [
 8 |     dict(type='LoadImageFromFile', backend_args=backend_args),
 9 |     dict(type='Resize', scale=(2560, 640), keep_ratio=True),
10 |     dict(type='LoadPanopticAnnotations', backend_args=backend_args),
11 |     dict(
12 |         type='PackDetInputs',
13 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
14 |                    'scale_factor'))
15 | ]
16 | 
17 | val_dataloader = dict(
18 |     batch_size=1,
19 |     num_workers=0,
20 |     persistent_workers=False,
21 |     drop_last=False,
22 |     sampler=dict(type='DefaultSampler', shuffle=False),
23 |     dataset=dict(
24 |         type=dataset_type,
25 |         data_root=data_root,
26 |         ann_file='ade20k_panoptic_val.json',
27 |         data_prefix=dict(img='images/validation/', seg='ade20k_panoptic_val/'),
28 |         test_mode=True,
29 |         pipeline=test_pipeline,
30 |         backend_args=backend_args))
31 | test_dataloader = val_dataloader
32 | 
33 | val_evaluator = dict(
34 |     type='CocoPanopticMetric',
35 |     ann_file=data_root + 'ade20k_panoptic_val.json',
36 |     seg_prefix=data_root + 'ade20k_panoptic_val/',
37 |     backend_args=backend_args)
38 | test_evaluator = val_evaluator
39 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/ade20k_semantic.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'ADE20KSegDataset'
 2 | data_root = 'data/ADEChallengeData2016/'
 3 | 
 4 | # Example to use different file client
 5 | # Method 1: simply set the data root and let the file I/O module
 6 | # automatically infer from prefix (not support LMDB and Memcache yet)
 7 | 
 8 | # data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/'
 9 | 
10 | # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
11 | # backend_args = dict(
12 | #     backend='petrel',
13 | #     path_mapping=dict({
14 | #         './data/': 's3://openmmlab/datasets/detection/',
15 | #         'data/': 's3://openmmlab/datasets/detection/'
16 | #     }))
17 | backend_args = None
18 | 
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile', backend_args=backend_args),
21 |     dict(type='Resize', scale=(2048, 512), keep_ratio=True),
22 |     dict(
23 |         type='LoadAnnotations',
24 |         with_bbox=False,
25 |         with_mask=False,
26 |         with_seg=True,
27 |         reduce_zero_label=True),
28 |     dict(
29 |         type='PackDetInputs', meta_keys=('img_path', 'ori_shape', 'img_shape'))
30 | ]
31 | 
32 | val_dataloader = dict(
33 |     batch_size=1,
34 |     num_workers=2,
35 |     persistent_workers=True,
36 |     drop_last=False,
37 |     sampler=dict(type='DefaultSampler', shuffle=False),
38 |     dataset=dict(
39 |         type=dataset_type,
40 |         data_root=data_root,
41 |         data_prefix=dict(
42 |             img_path='images/validation',
43 |             seg_map_path='annotations/validation'),
44 |         pipeline=test_pipeline))
45 | test_dataloader = val_dataloader
46 | 
47 | val_evaluator = dict(type='SemSegMetric', iou_metrics=['mIoU'])
48 | test_evaluator = val_evaluator
49 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'lvis_v0.5_instance.py'
 3 | dataset_type = 'LVISV1Dataset'
 4 | data_root = 'data/lvis_v1/'
 5 | 
 6 | train_dataloader = dict(
 7 |     dataset=dict(
 8 |         dataset=dict(
 9 |             type=dataset_type,
10 |             data_root=data_root,
11 |             ann_file='annotations/lvis_v1_train.json',
12 |             data_prefix=dict(img=''))))
13 | val_dataloader = dict(
14 |     dataset=dict(
15 |         type=dataset_type,
16 |         data_root=data_root,
17 |         ann_file='annotations/lvis_v1_val.json',
18 |         data_prefix=dict(img='')))
19 | test_dataloader = val_dataloader
20 | 
21 | val_evaluator = dict(ann_file=data_root + 'annotations/lvis_v1_val.json')
22 | test_evaluator = val_evaluator
23 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/refcoco+.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'RefCocoDataset'
 3 | data_root = 'data/coco/'
 4 | 
 5 | backend_args = None
 6 | 
 7 | test_pipeline = [
 8 |     dict(type='LoadImageFromFile', backend_args=backend_args),
 9 |     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
10 |     dict(
11 |         type='LoadAnnotations',
12 |         with_mask=True,
13 |         with_bbox=False,
14 |         with_seg=False,
15 |         with_label=False),
16 |     dict(
17 |         type='PackDetInputs',
18 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
19 |                    'scale_factor', 'gt_masks', 'text'))
20 | ]
21 | 
22 | val_dataloader = dict(
23 |     batch_size=1,
24 |     num_workers=2,
25 |     persistent_workers=True,
26 |     drop_last=False,
27 |     sampler=dict(type='DefaultSampler', shuffle=False),
28 |     dataset=dict(
29 |         type=dataset_type,
30 |         data_root=data_root,
31 |         data_prefix=dict(img_path='train2014/'),
32 |         ann_file='refcoco+/instances.json',
33 |         split_file='refcoco+/refs(unc).p',
34 |         split='val',
35 |         text_mode='select_first',
36 |         pipeline=test_pipeline))
37 | 
38 | test_dataloader = dict(
39 |     batch_size=1,
40 |     num_workers=2,
41 |     persistent_workers=True,
42 |     drop_last=False,
43 |     sampler=dict(type='DefaultSampler', shuffle=False),
44 |     dataset=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         data_prefix=dict(img_path='train2014/'),
48 |         ann_file='refcoco+/instances.json',
49 |         split_file='refcoco+/refs(unc).p',
50 |         split='testA',  # or 'testB'
51 |         text_mode='select_first',
52 |         pipeline=test_pipeline))
53 | 
54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU'])
55 | test_evaluator = val_evaluator
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/refcoco.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'RefCocoDataset'
 3 | data_root = 'data/coco/'
 4 | 
 5 | backend_args = None
 6 | 
 7 | test_pipeline = [
 8 |     dict(type='LoadImageFromFile', backend_args=backend_args),
 9 |     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
10 |     dict(
11 |         type='LoadAnnotations',
12 |         with_mask=True,
13 |         with_bbox=False,
14 |         with_seg=False,
15 |         with_label=False),
16 |     dict(
17 |         type='PackDetInputs',
18 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
19 |                    'scale_factor', 'gt_masks', 'text'))
20 | ]
21 | 
22 | val_dataloader = dict(
23 |     batch_size=1,
24 |     num_workers=2,
25 |     persistent_workers=True,
26 |     drop_last=False,
27 |     sampler=dict(type='DefaultSampler', shuffle=False),
28 |     dataset=dict(
29 |         type=dataset_type,
30 |         data_root=data_root,
31 |         data_prefix=dict(img_path='train2014/'),
32 |         ann_file='refcoco/instances.json',
33 |         split_file='refcoco/refs(unc).p',
34 |         split='val',
35 |         text_mode='select_first',
36 |         pipeline=test_pipeline))
37 | 
38 | test_dataloader = dict(
39 |     batch_size=1,
40 |     num_workers=2,
41 |     persistent_workers=True,
42 |     drop_last=False,
43 |     sampler=dict(type='DefaultSampler', shuffle=False),
44 |     dataset=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         data_prefix=dict(img_path='train2014/'),
48 |         ann_file='refcoco/instances.json',
49 |         split_file='refcoco/refs(unc).p',
50 |         split='testA',  # or 'testB'
51 |         text_mode='select_first',
52 |         pipeline=test_pipeline))
53 | 
54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU'])
55 | test_evaluator = val_evaluator
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/refcocog.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'RefCocoDataset'
 3 | data_root = 'data/coco/'
 4 | 
 5 | backend_args = None
 6 | 
 7 | test_pipeline = [
 8 |     dict(type='LoadImageFromFile', backend_args=backend_args),
 9 |     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
10 |     dict(
11 |         type='LoadAnnotations',
12 |         with_mask=True,
13 |         with_bbox=False,
14 |         with_seg=False,
15 |         with_label=False),
16 |     dict(
17 |         type='PackDetInputs',
18 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
19 |                    'scale_factor', 'gt_masks', 'text'))
20 | ]
21 | 
22 | val_dataloader = dict(
23 |     batch_size=1,
24 |     num_workers=2,
25 |     persistent_workers=True,
26 |     drop_last=False,
27 |     sampler=dict(type='DefaultSampler', shuffle=False),
28 |     dataset=dict(
29 |         type=dataset_type,
30 |         data_root=data_root,
31 |         data_prefix=dict(img_path='train2014/'),
32 |         ann_file='refcocog/instances.json',
33 |         split_file='refcocog/refs(umd).p',
34 |         split='val',
35 |         text_mode='select_first',
36 |         pipeline=test_pipeline))
37 | 
38 | test_dataloader = dict(
39 |     batch_size=1,
40 |     num_workers=2,
41 |     persistent_workers=True,
42 |     drop_last=False,
43 |     sampler=dict(type='DefaultSampler', shuffle=False),
44 |     dataset=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         data_prefix=dict(img_path='train2014/'),
48 |         ann_file='refcocog/instances.json',
49 |         split_file='refcocog/refs(umd).p',
50 |         split='test',
51 |         text_mode='select_first',
52 |         pipeline=test_pipeline))
53 | 
54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU'])
55 | test_evaluator = val_evaluator
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmdet'
 2 | 
 3 | default_hooks = dict(
 4 |     timer=dict(type='IterTimerHook'),
 5 |     logger=dict(type='LoggerHook', interval=50),
 6 |     param_scheduler=dict(type='ParamSchedulerHook'),
 7 |     checkpoint=dict(type='CheckpointHook', interval=1),
 8 |     sampler_seed=dict(type='DistSamplerSeedHook'),
 9 |     visualization=dict(type='DetVisualizationHook'))
10 | 
11 | env_cfg = dict(
12 |     cudnn_benchmark=False,
13 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
14 |     dist_cfg=dict(backend='nccl'),
15 | )
16 | 
17 | vis_backends = [dict(type='LocalVisBackend')]
18 | visualizer = dict(
19 |     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
20 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
21 | 
22 | log_level = 'INFO'
23 | load_from = None
24 | resume = False
25 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 1x
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=12,
14 |         by_epoch=True,
15 |         milestones=[8, 11],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 20e
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=20,
14 |         by_epoch=True,
15 |         milestones=[16, 19],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 2x
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=24,
14 |         by_epoch=True,
15 |         milestones=[16, 22],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py'  # noqa
 2 | 
 3 | # please install mmpretrain
 4 | # import mmpretrain.models to trigger register_module in mmpretrain
 5 | custom_imports = dict(
 6 |     imports=['mmpretrain.models'], allow_failed_imports=False)
 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth'  # noqa
 8 | 
 9 | model = dict(
10 |     backbone=dict(
11 |         _delete_=True,
12 |         type='mmpretrain.ConvNeXt',
13 |         arch='small',
14 |         out_indices=[0, 1, 2, 3],
15 |         drop_path_rate=0.6,
16 |         layer_scale_init_value=1.0,
17 |         gap_before_final_norm=False,
18 |         init_cfg=dict(
19 |             type='Pretrained', checkpoint=checkpoint_file,
20 |             prefix='backbone.')))
21 | 
22 | optim_wrapper = dict(paramwise_cfg={
23 |     'decay_rate': 0.7,
24 |     'decay_type': 'layer_wise',
25 |     'num_layers': 12
26 | })
27 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101-caffe_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50-caffe_fpn_1x_coco.py'
2 | model = dict(
3 |     backbone=dict(
4 |         depth=101,
5 |         init_cfg=dict(
6 |             type='Pretrained',
7 |             checkpoint='open-mmlab://detectron2/resnet101_caffe')))
8 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101-caffe_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../common/ms-poly_3x_coco-instance.py',
 3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
 4 | ]
 5 | 
 6 | model = dict(
 7 |     # use caffe img_norm
 8 |     data_preprocessor=dict(
 9 |         mean=[103.530, 116.280, 123.675],
10 |         std=[1.0, 1.0, 1.0],
11 |         bgr_to_rgb=False),
12 |     backbone=dict(
13 |         depth=101,
14 |         norm_cfg=dict(requires_grad=False),
15 |         norm_eval=True,
16 |         style='caffe',
17 |         init_cfg=dict(
18 |             type='Pretrained',
19 |             checkpoint='open-mmlab://detectron2/resnet101_caffe')))
20 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
2 | model = dict(
3 |     backbone=dict(
4 |         depth=101,
5 |         init_cfg=dict(type='Pretrained',
6 |                       checkpoint='torchvision://resnet101')))
7 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101_fpn_2x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50_fpn_2x_coco.py'
2 | model = dict(
3 |     backbone=dict(
4 |         depth=101,
5 |         init_cfg=dict(type='Pretrained',
6 |                       checkpoint='torchvision://resnet101')))
7 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101_fpn_8xb8-amp-lsj-200e_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         depth=101,
6 |         init_cfg=dict(type='Pretrained',
7 |                       checkpoint='torchvision://resnet101')))
8 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r101_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../common/ms-poly_3x_coco-instance.py',
 3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
 4 | ]
 5 | 
 6 | model = dict(
 7 |     backbone=dict(
 8 |         depth=101,
 9 |         init_cfg=dict(type='Pretrained',
10 |                       checkpoint='torchvision://resnet101')))
11 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r18_fpn_8xb8-amp-lsj-200e_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         depth=18,
6 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),
7 |     neck=dict(in_channels=[64, 128, 256, 512]))
8 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/mask-rcnn_r50-caffe-c4.py',
3 |     '../_base_/datasets/coco_instance.py',
4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 | ]
6 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
 2 | model = dict(
 3 |     # use caffe img_norm
 4 |     data_preprocessor=dict(
 5 |         mean=[103.530, 116.280, 123.675],
 6 |         std=[1.0, 1.0, 1.0],
 7 |         bgr_to_rgb=False),
 8 |     backbone=dict(
 9 |         norm_cfg=dict(requires_grad=False),
10 |         style='caffe',
11 |         init_cfg=dict(
12 |             type='Pretrained',
13 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')))
14 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     # use caffe img_norm
 5 |     data_preprocessor=dict(
 6 |         mean=[103.530, 116.280, 123.675],
 7 |         std=[1.0, 1.0, 1.0],
 8 |         bgr_to_rgb=False),
 9 |     backbone=dict(
10 |         norm_cfg=dict(requires_grad=False),
11 |         style='caffe',
12 |         init_cfg=dict(
13 |             type='Pretrained',
14 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')))
15 | 
16 | train_pipeline = [
17 |     dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
18 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
19 |     dict(
20 |         type='RandomChoiceResize',
21 |         scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
22 |                 (1333, 768), (1333, 800)],
23 |         keep_ratio=True),
24 |     dict(type='RandomFlip', prob=0.5),
25 |     dict(type='PackDetInputs'),
26 | ]
27 | 
28 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
29 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     # use caffe img_norm
 5 |     data_preprocessor=dict(
 6 |         mean=[103.530, 116.280, 123.675],
 7 |         std=[1.0, 1.0, 1.0],
 8 |         bgr_to_rgb=False),
 9 |     backbone=dict(
10 |         norm_cfg=dict(requires_grad=False),
11 |         style='caffe',
12 |         init_cfg=dict(
13 |             type='Pretrained',
14 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')))
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
17 |     dict(
18 |         type='LoadAnnotations',
19 |         with_bbox=True,
20 |         with_mask=True,
21 |         poly2mask=False),
22 |     dict(
23 |         type='RandomChoiceResize',
24 |         scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
25 |                 (1333, 768), (1333, 800)],
26 |         keep_ratio=True),
27 |     dict(type='RandomFlip', prob=0.5),
28 |     dict(type='PackDetInputs')
29 | ]
30 | 
31 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
32 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-2x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py'
 2 | 
 3 | train_cfg = dict(max_epochs=24)
 4 | # learning rate
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
 8 |     dict(
 9 |         type='MultiStepLR',
10 |         begin=0,
11 |         end=24,
12 |         by_epoch=True,
13 |         milestones=[16, 22],
14 |         gamma=0.1)
15 | ]
16 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py'
 2 | 
 3 | train_cfg = dict(max_epochs=36)
 4 | # learning rate
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
 8 |     dict(
 9 |         type='MultiStepLR',
10 |         begin=0,
11 |         end=24,
12 |         by_epoch=True,
13 |         milestones=[28, 34],
14 |         gamma=0.1)
15 | ]
16 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_poly-1x_coco_v1.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     # use caffe img_norm
 5 |     data_preprocessor=dict(
 6 |         mean=[103.530, 116.280, 123.675],
 7 |         std=[1.0, 1.0, 1.0],
 8 |         bgr_to_rgb=False),
 9 |     backbone=dict(
10 |         norm_cfg=dict(requires_grad=False),
11 |         style='caffe',
12 |         init_cfg=dict(
13 |             type='Pretrained',
14 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
15 |     rpn_head=dict(
16 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
17 |     roi_head=dict(
18 |         bbox_roi_extractor=dict(
19 |             roi_layer=dict(
20 |                 type='RoIAlign',
21 |                 output_size=7,
22 |                 sampling_ratio=2,
23 |                 aligned=False)),
24 |         bbox_head=dict(
25 |             loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
26 |         mask_roi_extractor=dict(
27 |             roi_layer=dict(
28 |                 type='RoIAlign',
29 |                 output_size=14,
30 |                 sampling_ratio=2,
31 |                 aligned=False))))
32 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
 3 |     '../_base_/datasets/coco_instance.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]
 8 | visualizer = dict(vis_backends=vis_backends)
 9 | 
10 | # MMEngine support the following two ways, users can choose
11 | # according to convenience
12 | # default_hooks = dict(checkpoint=dict(interval=4))
13 | _base_.default_hooks.checkpoint.interval = 4
14 | 
15 | # train_cfg = dict(val_interval=2)
16 | _base_.train_cfg.val_interval = 2
17 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
3 |     '../_base_/datasets/coco_instance.py',
4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 | ]
6 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_2x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
3 |     '../_base_/datasets/coco_instance.py',
4 |     '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
5 | ]
6 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
 3 |     '../common/lsj-100e_coco-instance.py'
 4 | ]
 5 | image_size = (1024, 1024)
 6 | batch_augments = [
 7 |     dict(type='BatchFixedSizePad', size=image_size, pad_mask=True)
 8 | ]
 9 | 
10 | model = dict(data_preprocessor=dict(batch_augments=batch_augments))
11 | 
12 | train_dataloader = dict(batch_size=8, num_workers=4)
13 | # Enable automatic-mixed-precision training with AmpOptimWrapper.
14 | optim_wrapper = dict(
15 |     type='AmpOptimWrapper',
16 |     optimizer=dict(
17 |         type='SGD', lr=0.02 * 4, momentum=0.9, weight_decay=0.00004))
18 | 
19 | # NOTE: `auto_scale_lr` is for automatically scaling LR,
20 | # USER SHOULD NOT CHANGE ITS VALUES.
21 | # base_batch_size = (8 GPUs) x (8 samples per GPU)
22 | auto_scale_lr = dict(base_batch_size=64)
23 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_amp-1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py'
2 | 
3 | # Enable automatic-mixed-precision training with AmpOptimWrapper.
4 | optim_wrapper = dict(type='AmpOptimWrapper')
5 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../common/ms-poly_3x_coco-instance.py',
3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
4 | ]
5 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
 3 |     '../_base_/datasets/coco_instance.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
 9 |     dict(
10 |         type='LoadAnnotations',
11 |         with_bbox=True,
12 |         with_mask=True,
13 |         poly2mask=False),
14 |     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PackDetInputs'),
17 | ]
18 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
19 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py'
 2 | model = dict(
 3 |     backbone=dict(
 4 |         type='ResNeXt',
 5 |         depth=101,
 6 |         groups=32,
 7 |         base_width=4,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         style='pytorch',
13 |         init_cfg=dict(
14 |             type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
15 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_2x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r101_fpn_2x_coco.py'
 2 | model = dict(
 3 |     backbone=dict(
 4 |         type='ResNeXt',
 5 |         depth=101,
 6 |         groups=32,
 7 |         base_width=4,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         style='pytorch',
13 |         init_cfg=dict(
14 |             type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
15 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../common/ms-poly_3x_coco-instance.py',
 3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
 4 | ]
 5 | 
 6 | model = dict(
 7 |     backbone=dict(
 8 |         type='ResNeXt',
 9 |         depth=101,
10 |         groups=32,
11 |         base_width=4,
12 |         num_stages=4,
13 |         out_indices=(0, 1, 2, 3),
14 |         frozen_stages=1,
15 |         norm_cfg=dict(type='BN', requires_grad=True),
16 |         style='pytorch',
17 |         init_cfg=dict(
18 |             type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
19 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     # ResNeXt-101-32x8d model trained with Caffe2 at FB,
 5 |     # so the mean and std need to be changed.
 6 |     data_preprocessor=dict(
 7 |         mean=[103.530, 116.280, 123.675],
 8 |         std=[57.375, 57.120, 58.395],
 9 |         bgr_to_rgb=False),
10 |     backbone=dict(
11 |         type='ResNeXt',
12 |         depth=101,
13 |         groups=32,
14 |         base_width=8,
15 |         num_stages=4,
16 |         out_indices=(0, 1, 2, 3),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type='BN', requires_grad=False),
19 |         style='pytorch',
20 |         init_cfg=dict(
21 |             type='Pretrained',
22 |             checkpoint='open-mmlab://detectron2/resnext101_32x8d')))
23 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     # ResNeXt-101-32x8d model trained with Caffe2 at FB,
 5 |     # so the mean and std need to be changed.
 6 |     data_preprocessor=dict(
 7 |         mean=[103.530, 116.280, 123.675],
 8 |         std=[57.375, 57.120, 58.395],
 9 |         bgr_to_rgb=False),
10 |     backbone=dict(
11 |         type='ResNeXt',
12 |         depth=101,
13 |         groups=32,
14 |         base_width=8,
15 |         num_stages=4,
16 |         out_indices=(0, 1, 2, 3),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type='BN', requires_grad=False),
19 |         style='pytorch',
20 |         init_cfg=dict(
21 |             type='Pretrained',
22 |             checkpoint='open-mmlab://detectron2/resnext101_32x8d')))
23 | 
24 | train_pipeline = [
25 |     dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
26 |     dict(
27 |         type='LoadAnnotations',
28 |         with_bbox=True,
29 |         with_mask=True,
30 |         poly2mask=False),
31 |     dict(
32 |         type='RandomChoiceResize',
33 |         scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
34 |                 (1333, 768), (1333, 800)],
35 |         keep_ratio=True),
36 |     dict(type='RandomFlip', prob=0.5),
37 |     dict(type='PackDetInputs'),
38 | ]
39 | 
40 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
41 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../common/ms-poly_3x_coco-instance.py',
 3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
 4 | ]
 5 | 
 6 | model = dict(
 7 |     # ResNeXt-101-32x8d model trained with Caffe2 at FB,
 8 |     # so the mean and std need to be changed.
 9 |     data_preprocessor=dict(
10 |         mean=[103.530, 116.280, 123.675],
11 |         std=[57.375, 57.120, 58.395],
12 |         bgr_to_rgb=False),
13 |     backbone=dict(
14 |         type='ResNeXt',
15 |         depth=101,
16 |         groups=32,
17 |         base_width=8,
18 |         num_stages=4,
19 |         out_indices=(0, 1, 2, 3),
20 |         frozen_stages=1,
21 |         norm_cfg=dict(type='BN', requires_grad=False),
22 |         style='pytorch',
23 |         init_cfg=dict(
24 |             type='Pretrained',
25 |             checkpoint='open-mmlab://detectron2/resnext101_32x8d')))
26 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_x101-32x4d_fpn_1x_coco.py'
 2 | model = dict(
 3 |     backbone=dict(
 4 |         type='ResNeXt',
 5 |         depth=101,
 6 |         groups=64,
 7 |         base_width=4,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         style='pytorch',
13 |         init_cfg=dict(
14 |             type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
15 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_2x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './mask-rcnn_x101-32x4d_fpn_2x_coco.py'
 2 | model = dict(
 3 |     backbone=dict(
 4 |         type='ResNeXt',
 5 |         depth=101,
 6 |         groups=64,
 7 |         base_width=4,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         style='pytorch',
13 |         init_cfg=dict(
14 |             type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
15 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../common/ms-poly_3x_coco-instance.py',
 3 |     '../_base_/models/mask-rcnn_r50_fpn.py'
 4 | ]
 5 | 
 6 | model = dict(
 7 |     backbone=dict(
 8 |         type='ResNeXt',
 9 |         depth=101,
10 |         groups=64,
11 |         base_width=4,
12 |         num_stages=4,
13 |         out_indices=(0, 1, 2, 3),
14 |         frozen_stages=1,
15 |         norm_cfg=dict(type='BN', requires_grad=True),
16 |         style='pytorch',
17 |         init_cfg=dict(
18 |             type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
19 | 


--------------------------------------------------------------------------------
/detection/configs/swin/mask-rcnn_swin-s-p4-w7_fpn_amp-ms-crop-3x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py'
2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa
3 | model = dict(
4 |     backbone=dict(
5 |         depths=[2, 2, 18, 2],
6 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)))
7 | 


--------------------------------------------------------------------------------
/detection/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/mask-rcnn_r50_fpn.py',
 3 |     '../_base_/datasets/coco_instance.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
 7 | model = dict(
 8 |     type='MaskRCNN',
 9 |     backbone=dict(
10 |         _delete_=True,
11 |         type='SwinTransformer',
12 |         embed_dims=96,
13 |         depths=[2, 2, 6, 2],
14 |         num_heads=[3, 6, 12, 24],
15 |         window_size=7,
16 |         mlp_ratio=4,
17 |         qkv_bias=True,
18 |         qk_scale=None,
19 |         drop_rate=0.,
20 |         attn_drop_rate=0.,
21 |         drop_path_rate=0.2,
22 |         patch_norm=True,
23 |         out_indices=(0, 1, 2, 3),
24 |         with_cp=False,
25 |         convert_weights=True,
26 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
27 |     neck=dict(in_channels=[96, 192, 384, 768]))
28 | 
29 | max_epochs = 12
30 | train_cfg = dict(max_epochs=max_epochs)
31 | 
32 | # learning rate
33 | param_scheduler = [
34 |     dict(
35 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
36 |         end=1000),
37 |     dict(
38 |         type='MultiStepLR',
39 |         begin=0,
40 |         end=max_epochs,
41 |         by_epoch=True,
42 |         milestones=[8, 11],
43 |         gamma=0.1)
44 | ]
45 | 
46 | # optimizer
47 | optim_wrapper = dict(
48 |     type='OptimWrapper',
49 |     paramwise_cfg=dict(
50 |         custom_keys={
51 |             'absolute_pos_embed': dict(decay_mult=0.),
52 |             'relative_position_bias_table': dict(decay_mult=0.),
53 |             'norm': dict(decay_mult=0.)
54 |         }),
55 |     optimizer=dict(
56 |         _delete_=True,
57 |         type='AdamW',
58 |         lr=0.0001,
59 |         betas=(0.9, 0.999),
60 |         weight_decay=0.05))
61 | 


--------------------------------------------------------------------------------
/detection/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py'
2 | # Enable automatic-mixed-precision training with AmpOptimWrapper.
3 | optim_wrapper = dict(type='AmpOptimWrapper')
4 | 


--------------------------------------------------------------------------------
/detection/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/retinanet_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
 7 | model = dict(
 8 |     backbone=dict(
 9 |         _delete_=True,
10 |         type='SwinTransformer',
11 |         embed_dims=96,
12 |         depths=[2, 2, 6, 2],
13 |         num_heads=[3, 6, 12, 24],
14 |         window_size=7,
15 |         mlp_ratio=4,
16 |         qkv_bias=True,
17 |         qk_scale=None,
18 |         drop_rate=0.,
19 |         attn_drop_rate=0.,
20 |         drop_path_rate=0.2,
21 |         patch_norm=True,
22 |         out_indices=(1, 2, 3),
23 |         # Please only add indices that would be used
24 |         # in FPN, otherwise some parameter will not be used
25 |         with_cp=False,
26 |         convert_weights=True,
27 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
28 |     neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5))
29 | 
30 | # optimizer
31 | optim_wrapper = dict(optimizer=dict(lr=0.01))
32 | 


--------------------------------------------------------------------------------
/detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MMDET_VSSM',
 8 |         depths=(2, 2, 27, 2),
 9 |         dims=128,
10 |         out_indices=(0, 1, 2, 3),
11 |         pretrained="../../ckpts/vssmbase/ckpt_epoch_260.pth",
12 |     ),
13 |     neck=dict(in_channels=[128, 256, 512, 1024]),
14 | )
15 | 
16 | # too big
17 | train_dataloader = dict(batch_size=1) # as gpus=16
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MMDET_VSSM',
 8 |         depths=(2, 2, 27, 2),
 9 |         dims=96,
10 |         out_indices=(0, 1, 2, 3),
11 |         pretrained="../../ckpts/vssmsmall/ckpt_epoch_292.pth",
12 |     ),
13 | )
14 | 
15 | # train_dataloader = dict(batch_size=2) # as gpus=8
16 | 
17 | 


--------------------------------------------------------------------------------
/detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MMDET_VSSM',
 8 |         depths=(2, 2, 9, 2),
 9 |         dims=96,
10 |         out_indices=(0, 1, 2, 3),
11 |         pretrained="../../ckpts/vssmtiny/ckpt_epoch_292.pth",
12 |     ),
13 | )
14 | 
15 | # train_dataloader = dict(batch_size=2) # as gpus=8
16 | 
17 | 


--------------------------------------------------------------------------------
/detection/configs/vssm/mask_rcnn_vssm_fpn_coco_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="../../ckpts/classification/outs/vssm/vssmbasedp05/vssmbase_dp05_ckpt_epoch_260.pth",
10 |         # copied from classification/configs/vssm/vssm_base_224.yaml
11 |         dims=128,
12 |         depths=(2, 2, 27, 2),
13 |         ssm_d_state=16,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         mlp_ratio=0.0,
17 |         downsample_version="v1",
18 |         patchembed_version="v1",
19 |         # forward_type="v0", # if you want exactly the same
20 |     ),
21 |     neck=dict(in_channels=[128, 256, 512, 1024]),
22 | )
23 | 
24 | # too big
25 | train_dataloader = dict(batch_size=1) # as gpus=16
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/detection/configs/vssm/mask_rcnn_vssm_fpn_coco_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth",
10 |         # copied from classification/configs/vssm/vssm_small_224.yaml
11 |         dims=96,
12 |         depths=(2, 2, 27, 2),
13 |         ssm_d_state=16,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         mlp_ratio=0.0,
17 |         downsample_version="v1",
18 |         patchembed_version="v1",
19 |         # forward_type="v0", # if you want exactly the same
20 |     ),
21 | )
22 | 
23 | # train_dataloader = dict(batch_size=2) # as gpus=8
24 | 
25 | 


--------------------------------------------------------------------------------
/detection/configs/vssm/mask_rcnn_vssm_fpn_coco_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="../../ckpts/classification/outs/vssm/vssmtiny/vssmtiny_dp01_ckpt_epoch_292.pth",
10 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
11 |         dims=96,
12 |         depths=(2, 2, 9, 2),
13 |         ssm_d_state=16,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         mlp_ratio=0.0,
17 |         downsample_version="v1",
18 |         patchembed_version="v1",
19 |         # forward_type="v0", # if you want exactly the same
20 |     ),
21 | )
22 | 
23 | # train_dataloader = dict(batch_size=2) # as gpus=8
24 | 
25 | 


--------------------------------------------------------------------------------
/detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="",
10 |         # copied from classification/configs/vssm/vssm_base_224.yaml
11 |         dims=128,
12 |         depths=(2, 2, 15, 2),
13 |         ssm_d_state=1,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         ssm_conv=3,
17 |         ssm_conv_bias=False,
18 |         forward_type="v05_noz", # v3_noz
19 |         mlp_ratio=4.0,
20 |         downsample_version="v3",
21 |         patchembed_version="v2",
22 |         drop_path_rate=0.6,
23 |         norm_layer="ln2d",
24 |     ),
25 |     neck=dict(in_channels=[128, 256, 512, 1024]),
26 | )
27 | 
28 | # too big
29 | # train_dataloader = dict(batch_size=1) # as gpus=16
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="",
10 |         # copied from classification/configs/vssm/vssm_small_224.yaml
11 |         dims=96,
12 |         depths=(2, 2, 15, 2),
13 |         ssm_d_state=1,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         ssm_conv=3,
17 |         ssm_conv_bias=False,
18 |         forward_type="v05_noz", # v3_noz
19 |         mlp_ratio=4.0,
20 |         downsample_version="v3",
21 |         patchembed_version="v2",
22 |         drop_path_rate=0.3,
23 |         norm_layer="ln2d",
24 |     ),
25 | )
26 | 
27 | # train_dataloader = dict(batch_size=2) # as gpus=8
28 | 
29 | 


--------------------------------------------------------------------------------
/detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="",
10 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
11 |        dims=96,
12 |         # depths=(2, 2, 5, 2),
13 |         depths=(2, 2, 8, 2),
14 |         ssm_d_state=1,
15 |         ssm_dt_rank="auto",
16 |         # ssm_ratio=2.0,
17 |         ssm_ratio=1.0,
18 |         ssm_conv=3,
19 |         ssm_conv_bias=False,
20 |         forward_type="v05_noz", # v3_noz
21 |         mlp_ratio=4.0,
22 |         downsample_version="v3",
23 |         patchembed_version="v2",
24 |         drop_path_rate=0.2,
25 |         norm_layer="ln2d",
26 |     ),
27 | )
28 | 
29 | # train_dataloader = dict(batch_size=2) # as gpus=8
30 | 
31 | 


--------------------------------------------------------------------------------
/detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_tiny1.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         type='MM_VSSM',
 8 |         out_indices=(0, 1, 2, 3),
 9 |         pretrained="",
10 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
11 |         dims=96,
12 |         depths=(2, 2, 5, 2),
13 |         ssm_d_state=1,
14 |         ssm_dt_rank="auto",
15 |         ssm_ratio=2.0,
16 |         ssm_conv=3,
17 |         ssm_conv_bias=False,
18 |         forward_type="v05_noz", # v3_noz
19 |         mlp_ratio=4.0,
20 |         downsample_version="v3",
21 |         patchembed_version="v2",
22 |         drop_path_rate=0.2,
23 |         norm_layer="ln2d",
24 |     ),
25 | )
26 | 
27 | # train_dataloader = dict(batch_size=2) # as gpus=8
28 | 
29 | 


--------------------------------------------------------------------------------
/detection/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from functools import partial
 3 | from typing import Callable
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.utils import checkpoint
 8 | 
 9 | from mmengine.model import BaseModule
10 | from mmdet.registry import MODELS as MODELS_MMDET
11 | from mmseg.registry import MODELS as MODELS_MMSEG
12 | 
13 | def import_abspy(name="models", path="classification/"):
14 |     import sys
15 |     import importlib
16 |     path = os.path.abspath(path)
17 |     assert os.path.isdir(path)
18 |     sys.path.insert(0, path)
19 |     module = importlib.import_module(name)
20 |     sys.path.pop(0)
21 |     return module
22 | 
23 | build = import_abspy(
24 |     "models", 
25 |     os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"),
26 | )
27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM
28 | 
29 | @MODELS_MMSEG.register_module()
30 | @MODELS_MMDET.register_module()
31 | class MM_VSSM(BaseModule, Backbone_VSSM):
32 |     def __init__(self, *args, **kwargs):
33 |         BaseModule.__init__(self)
34 |         Backbone_VSSM.__init__(self, *args, **kwargs)
35 | 
36 | 


--------------------------------------------------------------------------------
/detection/readme.md:
--------------------------------------------------------------------------------
1 | ## origins 
2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmdetection: `version 3.3.0`
3 | 
4 | 
5 | ## modifications
6 | `tools/train.py#12` is added with `import model`
7 | `tools/test.py#17` is added with `import model`
8 | 
9 |  


--------------------------------------------------------------------------------
/detection/tools/analysis_tools/coco_occluded_separated_recall.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from argparse import ArgumentParser
 3 | 
 4 | import mmengine
 5 | from mmengine.logging import print_log
 6 | 
 7 | from mmdet.datasets import CocoDataset
 8 | from mmdet.evaluation import CocoOccludedSeparatedMetric
 9 | 
10 | 
11 | def main():
12 |     parser = ArgumentParser(
13 |         description='Compute recall of COCO occluded and separated masks '
14 |         'presented in paper https://arxiv.org/abs/2210.10046.')
15 |     parser.add_argument('result', help='result file (pkl format) path')
16 |     parser.add_argument('--out', help='file path to save evaluation results')
17 |     parser.add_argument(
18 |         '--score-thr',
19 |         type=float,
20 |         default=0.3,
21 |         help='Score threshold for the recall calculation. Defaults to 0.3')
22 |     parser.add_argument(
23 |         '--iou-thr',
24 |         type=float,
25 |         default=0.75,
26 |         help='IoU threshold for the recall calculation. Defaults to 0.75.')
27 |     parser.add_argument(
28 |         '--ann',
29 |         default='data/coco/annotations/instances_val2017.json',
30 |         help='coco annotation file path')
31 |     args = parser.parse_args()
32 | 
33 |     results = mmengine.load(args.result)
34 |     assert 'masks' in results[0]['pred_instances'], \
35 |         'The results must be predicted by instance segmentation model.'
36 |     metric = CocoOccludedSeparatedMetric(
37 |         ann_file=args.ann, iou_thr=args.iou_thr, score_thr=args.score_thr)
38 |     metric.dataset_meta = CocoDataset.METAINFO
39 |     for datasample in results:
40 |         metric.process(data_batch=None, data_samples=[datasample])
41 |     metric_res = metric.compute_metrics(metric.results)
42 |     if args.out is not None:
43 |         mmengine.dump(metric_res, args.out)
44 |         print_log(f'Evaluation results have been saved to {args.out}.')
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/detection/tools/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import mmengine
 5 | from mmengine import Config, DictAction
 6 | from mmengine.evaluator import Evaluator
 7 | from mmengine.registry import init_default_scope
 8 | 
 9 | from mmdet.registry import DATASETS
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
14 |                                      'results saved in pkl format')
15 |     parser.add_argument('config', help='Config of the model')
16 |     parser.add_argument('pkl_results', help='Results in pickle format')
17 |     parser.add_argument(
18 |         '--cfg-options',
19 |         nargs='+',
20 |         action=DictAction,
21 |         help='override some settings in the used config, the key-value pair '
22 |         'in xxx=yyy format will be merged into config file. If the value to '
23 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
24 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
25 |         'Note that the quotation marks are necessary and that no white space '
26 |         'is allowed.')
27 |     args = parser.parse_args()
28 |     return args
29 | 
30 | 
31 | def main():
32 |     args = parse_args()
33 | 
34 |     cfg = Config.fromfile(args.config)
35 |     init_default_scope(cfg.get('default_scope', 'mmdet'))
36 | 
37 |     if args.cfg_options is not None:
38 |         cfg.merge_from_dict(args.cfg_options)
39 | 
40 |     dataset = DATASETS.build(cfg.test_dataloader.dataset)
41 |     predictions = mmengine.load(args.pkl_results)
42 | 
43 |     evaluator = Evaluator(cfg.val_evaluator)
44 |     evaluator.dataset_meta = dataset.metainfo
45 |     eval_results = evaluator.offline_evaluate(predictions)
46 |     print(eval_results)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/detection/tools/analysis_tools/mot/dist_mot_search.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/mot_param_search.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/detection/tools/analysis_tools/mot/slurm_mot_search.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=$4
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-2}
11 | PY_ARGS=${@:5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u $(dirname "$0")/mot_param_search.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/detection/tools/dataset_converters/scripts/preprocess_coco2017.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DOWNLOAD_DIR=$1
 4 | DATA_ROOT=$2
 5 | 
 6 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/val2017.zip -d $DATA_ROOT
 7 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/train2017.zip -d $DATA_ROOT
 8 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/test2017.zip -d $DATA_ROOT/
 9 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/unlabeled2017.zip -d $DATA_ROOT
10 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/stuff_annotations_trainval2017.zip -d $DATA_ROOT/
11 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/panoptic_annotations_trainval2017.zip -d $DATA_ROOT/
12 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/image_info_unlabeled2017.zip -d $DATA_ROOT/
13 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/image_info_test2017.zip -d $DATA_ROOT/
14 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/annotations_trainval2017.zip -d $DATA_ROOT
15 | rm -rf $DOWNLOAD_DIR/OpenDataLab___COCO_2017
16 | 


--------------------------------------------------------------------------------
/detection/tools/dataset_converters/scripts/preprocess_voc2007.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | DOWNLOAD_DIR=$1
4 | DATA_ROOT=$2
5 | 
6 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007/raw/VOCtrainval_06-Nov-2007.tar -C $DATA_ROOT
7 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007/raw/VOCtestnoimgs_06-Nov-2007.tar -C $DATA_ROOT
8 | rm -rf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007
9 | 


--------------------------------------------------------------------------------
/detection/tools/dataset_converters/scripts/preprocess_voc2012.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | DOWNLOAD_DIR=$1
4 | DATA_ROOT=$2
5 | 
6 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012/raw/VOCtrainval_11-May-2012.tar -C $DATA_ROOT
7 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012/raw/VOC2012test.tar -C $DATA_ROOT
8 | rm -rf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012
9 | 


--------------------------------------------------------------------------------
/detection/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/detection/tools/dist_test_tracking.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/test_tracking.py \
18 |     $CONFIG \
19 |     --launcher pytorch \
20 |     ${@:3}
21 | 


--------------------------------------------------------------------------------
/detection/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --launcher pytorch ${@:3}
20 | 


--------------------------------------------------------------------------------
/detection/tools/misc/gen_coco_panoptic_test_info.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os.path as osp
 3 | 
 4 | from mmengine.fileio import dump, load
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Generate COCO test image information '
10 |         'for COCO panoptic segmentation.')
11 |     parser.add_argument('data_root', help='Path to COCO annotation directory.')
12 |     args = parser.parse_args()
13 | 
14 |     return args
15 | 
16 | 
17 | def main():
18 |     args = parse_args()
19 |     data_root = args.data_root
20 |     val_info = load(osp.join(data_root, 'panoptic_val2017.json'))
21 |     test_old_info = load(osp.join(data_root, 'image_info_test-dev2017.json'))
22 | 
23 |     # replace categories from image_info_test-dev2017.json
24 |     # with categories from panoptic_val2017.json which
25 |     # has attribute `isthing`.
26 |     test_info = test_old_info
27 |     test_info.update({'categories': val_info['categories']})
28 |     dump(test_info, osp.join(data_root,
29 |                              'panoptic_image_info_test-dev2017.json'))
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     main()
34 | 


--------------------------------------------------------------------------------
/detection/tools/model_converters/detectron2_to_mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | from mmengine.fileio import load
 7 | from mmengine.runner import save_checkpoint
 8 | 
 9 | 
10 | def convert(src: str, dst: str, prefix: str = 'd2_model') -> None:
11 |     """Convert Detectron2 checkpoint to MMDetection style.
12 | 
13 |     Args:
14 |         src (str): The Detectron2 checkpoint path, should endswith `pkl`.
15 |         dst (str): The MMDetection checkpoint path.
16 |         prefix (str): The prefix of MMDetection model, defaults to 'd2_model'.
17 |     """
18 |     # load arch_settings
19 |     assert src.endswith('pkl'), \
20 |         'the source Detectron2 checkpoint should endswith `pkl`.'
21 |     d2_model = load(src, encoding='latin1').get('model')
22 |     assert d2_model is not None
23 | 
24 |     # convert to mmdet style
25 |     dst_state_dict = OrderedDict()
26 |     for name, value in d2_model.items():
27 |         if not isinstance(value, torch.Tensor):
28 |             value = torch.from_numpy(value)
29 |         dst_state_dict[f'{prefix}.{name}'] = value
30 | 
31 |     mmdet_model = dict(state_dict=dst_state_dict, meta=dict())
32 |     save_checkpoint(mmdet_model, dst)
33 |     print(f'Convert Detectron2 model {src} to MMDetection model {dst}')
34 | 
35 | 
36 | def main():
37 |     parser = argparse.ArgumentParser(
38 |         description='Convert Detectron2 checkpoint to MMDetection style')
39 |     parser.add_argument('src', help='Detectron2 model path')
40 |     parser.add_argument('dst', help='MMDetectron model save path')
41 |     parser.add_argument(
42 |         '--prefix', default='d2_model', type=str, help='prefix of the model')
43 |     args = parser.parse_args()
44 |     convert(args.src, args.dst, args.prefix)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/detection/tools/model_converters/selfsup2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def moco_convert(src, dst):
 9 |     """Convert keys in pycls pretrained moco models to mmdet style."""
10 |     # load caffe model
11 |     moco_model = torch.load(src)
12 |     blobs = moco_model['state_dict']
13 |     # convert to pytorch style
14 |     state_dict = OrderedDict()
15 |     for k, v in blobs.items():
16 |         if not k.startswith('module.encoder_q.'):
17 |             continue
18 |         old_k = k
19 |         k = k.replace('module.encoder_q.', '')
20 |         state_dict[k] = v
21 |         print(old_k, '->', k)
22 |     # save checkpoint
23 |     checkpoint = dict()
24 |     checkpoint['state_dict'] = state_dict
25 |     torch.save(checkpoint, dst)
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Convert model keys')
30 |     parser.add_argument('src', help='src detectron model path')
31 |     parser.add_argument('dst', help='save path')
32 |     parser.add_argument(
33 |         '--selfsup', type=str, choices=['moco', 'swav'], help='save path')
34 |     args = parser.parse_args()
35 |     if args.selfsup == 'moco':
36 |         moco_convert(args.src, args.dst)
37 |     elif args.selfsup == 'swav':
38 |         print('SWAV does not need to convert the keys')
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/detection/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/detection/tools/slurm_test_tracking.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | PY_ARGS=${@:4}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/test_tracking.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/detection/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cub_extra.cuh:
--------------------------------------------------------------------------------
 1 | // WarpMask is copied from /usr/local/cuda-12.1/include/cub/util_ptx.cuh
 2 | // PowerOfTwo is copied from /usr/local/cuda-12.1/include/cub/util_type.cuh
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <cub/util_type.cuh>
 7 | #include <cub/util_arch.cuh>
 8 | #include <cub/util_namespace.cuh>
 9 | #include <cub/util_debug.cuh>
10 | 
11 | /**
12 |  * \brief Statically determine if N is a power-of-two
13 |  */
14 |  template <int N>
15 |  struct PowerOfTwo
16 |  {
17 |      enum { VALUE = ((N & (N - 1)) == 0) };
18 |  };
19 |  
20 | 
21 | /**
22 |  * @brief Returns the warp mask for a warp of @p LOGICAL_WARP_THREADS threads
23 |  *
24 |  * @par
25 |  * If the number of threads assigned to the virtual warp is not a power of two,
26 |  * it's assumed that only one virtual warp exists.
27 |  *
28 |  * @tparam LOGICAL_WARP_THREADS <b>[optional]</b> The number of threads per
29 |  *                              "logical" warp (may be less than the number of
30 |  *                              hardware warp threads).
31 |  * @param warp_id Id of virtual warp within architectural warp
32 |  */
33 |  template <int LOGICAL_WARP_THREADS, int LEGACY_PTX_ARCH = 0>
34 |  __host__ __device__ __forceinline__
35 |  unsigned int WarpMask(unsigned int warp_id)
36 |  {
37 |    constexpr bool is_pow_of_two = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE;
38 |    constexpr bool is_arch_warp  = LOGICAL_WARP_THREADS == CUB_WARP_THREADS(0);
39 |  
40 |    unsigned int member_mask = 0xFFFFFFFFu >>
41 |                               (CUB_WARP_THREADS(0) - LOGICAL_WARP_THREADS);
42 |  
43 |    if (is_pow_of_two && !is_arch_warp)
44 |    {
45 |      member_mask <<= warp_id * LOGICAL_WARP_THREADS;
46 |    }
47 |  
48 |    return member_mask;
49 |  }
50 |  


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cus/selective_scan_core_bwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_bwd_kernel.cuh"
 5 | 
 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cus/selective_scan_core_fwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusndstate/selective_scan_core_bwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_bwd_kernel_ndstate.cuh"
 5 | 
 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusndstate/selective_scan_core_fwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_ndstate.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_bwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd2.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_bwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_bwd_cuda<2, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 7 | template void selective_scan_bwd_cuda<2, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
 8 | template void selective_scan_bwd_cuda<2, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd3.cu:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 |  * Copyright (c) 2023, Tri Dao.
3 |  ******************************************************************************/
4 | #include "selective_scan_bwd_kernel_nrow.cuh"
5 | 
6 | template void selective_scan_bwd_cuda<3, float, float>(SSMParamsBwd &params, cudaStream_t stream);
7 | template void selective_scan_bwd_cuda<3, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
8 | template void selective_scan_bwd_cuda<3, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
9 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd4.cu:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 |  * Copyright (c) 2023, Tri Dao.
3 |  ******************************************************************************/
4 | #include "selective_scan_bwd_kernel_nrow.cuh"
5 | 
6 | template void selective_scan_bwd_cuda<4, float, float>(SSMParamsBwd &params, cudaStream_t stream);
7 | template void selective_scan_bwd_cuda<4, at::Half, float>(SSMParamsBwd &params, cudaStream_t stream);
8 | template void selective_scan_bwd_cuda<4, at::BFloat16, float>(SSMParamsBwd &params, cudaStream_t stream);
9 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd2.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<2, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<2, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<2, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd3.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<3, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<3, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<3, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd4.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_nrow.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<4, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<4, at::Half, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<4, at::BFloat16, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | 
10 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusoflex/selective_scan_core_bwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_bwd_kernel_oflex.cuh"
 5 | 
 6 | template void selective_scan_bwd_cuda<1, float, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 7 | template void selective_scan_bwd_cuda<1, at::Half, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float, float>(SSMParamsBwd &params, cudaStream_t stream);
 9 | template void selective_scan_bwd_cuda<1, at::Half, float, at::Half>(SSMParamsBwd &params, cudaStream_t stream);
10 | template void selective_scan_bwd_cuda<1, at::BFloat16, float, at::BFloat16>(SSMParamsBwd &params, cudaStream_t stream);
11 | 
12 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/cusoflex/selective_scan_core_fwd.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | #include "selective_scan_fwd_kernel_oflex.cuh"
 5 | 
 6 | template void selective_scan_fwd_cuda<1, float, float, float>(SSMParamsBase &params, cudaStream_t stream);
 7 | template void selective_scan_fwd_cuda<1, at::Half, float, float>(SSMParamsBase &params, cudaStream_t stream);
 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float, float>(SSMParamsBase &params, cudaStream_t stream);
 9 | template void selective_scan_fwd_cuda<1, at::Half, float, at::Half>(SSMParamsBase &params, cudaStream_t stream);
10 | template void selective_scan_fwd_cuda<1, at::BFloat16, float, at::BFloat16>(SSMParamsBase &params, cudaStream_t stream);
11 | 
12 | 


--------------------------------------------------------------------------------
/kernels/selective_scan/csrc/selective_scan/static_switch.h:
--------------------------------------------------------------------------------
 1 | // Inspired by https://github.com/NVIDIA/DALI/blob/main/include/dali/core/static_switch.h
 2 | // and https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Dispatch.h
 3 | 
 4 | #pragma once
 5 | 
 6 | /// @param COND       - a boolean expression to switch by
 7 | /// @param CONST_NAME - a name given for the constexpr bool variable.
 8 | /// @param ...       - code to execute for true and false
 9 | ///
10 | /// Usage:
11 | /// ```
12 | /// BOOL_SWITCH(flag, BoolConst, [&] {
13 | ///     some_function<BoolConst>(...);
14 | /// });
15 | /// ```
16 | #define BOOL_SWITCH(COND, CONST_NAME, ...)                                           \
17 |     [&] {                                                                            \
18 |         if (COND) {                                                                  \
19 |             constexpr bool CONST_NAME = true;                                        \
20 |             return __VA_ARGS__();                                                    \
21 |         } else {                                                                     \
22 |             constexpr bool CONST_NAME = false;                                       \
23 |             return __VA_ARGS__();                                                    \
24 |         }                                                                            \
25 |     }()
26 | 


--------------------------------------------------------------------------------
/pretrained_weights/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/pretrained_weights/.gitkeep


--------------------------------------------------------------------------------
/req.txt:
--------------------------------------------------------------------------------
 1 | packaging
 2 | timm==0.4.12
 3 | pytest
 4 | chardet
 5 | yacs
 6 | termcolor
 7 | submitit
 8 | tensorboardX
 9 | fvcore
10 | seaborn
11 | scipy
12 | einops
13 | torch_dct==0.1.6
14 | numba
15 | scikit-image


--------------------------------------------------------------------------------
/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | # configs/ and tools/ is copied from https://github.com/open-mmlab/mmsegmentation: version 1.2.2
2 | # tools/train.py#13 is added with "import model"
3 | # tools/test.py#8 is added with "import model"
4 | 
5 |  


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (1024, 1024)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2048, 1024),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_768x768.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (768, 768)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2049, 1025),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (769, 769)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2049, 1025),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_832x832.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (832, 832)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2048, 1024),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/levir_256x256.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'LEVIRCDDataset'
 3 | data_root = r'data/LEVIRCD'
 4 | 
 5 | albu_train_transforms = [
 6 |     dict(type='RandomBrightnessContrast', p=0.2),
 7 |     dict(type='HorizontalFlip', p=0.5),
 8 |     dict(type='VerticalFlip', p=0.5)
 9 | ]
10 | 
11 | train_pipeline = [
12 |     dict(type='LoadMultipleRSImageFromFile'),
13 |     dict(type='LoadAnnotations'),
14 |     dict(type='Albu', transforms=albu_train_transforms),
15 |     dict(type='ConcatCDInput'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadMultipleRSImageFromFile'),
20 |     dict(type='LoadAnnotations'),
21 |     dict(type='ConcatCDInput'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | 
25 | tta_pipeline = [
26 |     dict(type='LoadMultipleRSImageFromFile'),
27 |     dict(
28 |         type='TestTimeAug',
29 |         transforms=[[dict(type='LoadAnnotations')],
30 |                     [dict(type='ConcatCDInput')],
31 |                     [dict(type='PackSegInputs')]])
32 | ]
33 | train_dataloader = dict(
34 |     batch_size=4,
35 |     num_workers=4,
36 |     persistent_workers=True,
37 |     sampler=dict(type='InfiniteSampler', shuffle=True),
38 |     dataset=dict(
39 |         type=dataset_type,
40 |         data_root=data_root,
41 |         data_prefix=dict(
42 |             img_path='train/A',
43 |             img_path2='train/B',
44 |             seg_map_path='train/label'),
45 |         pipeline=train_pipeline))
46 | val_dataloader = dict(
47 |     batch_size=1,
48 |     num_workers=4,
49 |     persistent_workers=True,
50 |     sampler=dict(type='DefaultSampler', shuffle=False),
51 |     dataset=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         data_prefix=dict(
55 |             img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
56 |         pipeline=test_pipeline))
57 | test_dataloader = val_dataloader
58 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
59 | test_evaluator = val_evaluator
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/synapse.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'SynapseDataset'
 2 | data_root = 'data/synapse/'
 3 | img_scale = (224, 224)
 4 | train_pipeline = [
 5 |     dict(type='LoadImageFromFile'),
 6 |     dict(type='LoadAnnotations'),
 7 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
 8 |     dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
 9 |     dict(type='PackSegInputs')
10 | ]
11 | test_pipeline = [
12 |     dict(type='LoadImageFromFile'),
13 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
14 |     dict(type='LoadAnnotations'),
15 |     dict(type='PackSegInputs')
16 | ]
17 | train_dataloader = dict(
18 |     batch_size=6,
19 |     num_workers=2,
20 |     persistent_workers=True,
21 |     sampler=dict(type='InfiniteSampler', shuffle=True),
22 |     dataset=dict(
23 |         type=dataset_type,
24 |         data_root=data_root,
25 |         data_prefix=dict(
26 |             img_path='img_dir/train', seg_map_path='ann_dir/train'),
27 |         pipeline=train_pipeline))
28 | val_dataloader = dict(
29 |     batch_size=1,
30 |     num_workers=4,
31 |     persistent_workers=True,
32 |     sampler=dict(type='DefaultSampler', shuffle=False),
33 |     dataset=dict(
34 |         type=dataset_type,
35 |         data_root=data_root,
36 |         data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
37 |         pipeline=test_pipeline))
38 | test_dataloader = val_dataloader
39 | 
40 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
41 | test_evaluator = val_evaluator
42 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmseg'
 2 | env_cfg = dict(
 3 |     cudnn_benchmark=True,
 4 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
 5 |     dist_cfg=dict(backend='nccl'),
 6 | )
 7 | vis_backends = [dict(type='LocalVisBackend')]
 8 | visualizer = dict(
 9 |     type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
10 | log_processor = dict(by_epoch=False)
11 | log_level = 'INFO'
12 | load_from = None
13 | resume = False
14 | 
15 | tta_model = dict(type='SegTTAModel')
16 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ann_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ANNHead',
27 |         in_channels=[1024, 2048],
28 |         in_index=[2, 3],
29 |         channels=512,
30 |         project_channels=256,
31 |         query_scales=(1, ),
32 |         key_pool_scales=(1, 3, 6, 8),
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/apcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='APCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pool_scales=(1, 2, 3, 6),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ccnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='CCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         recurrence=2,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/cgnet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[72.39239876, 82.90891754, 73.15835921],
 6 |     std=[1, 1, 1],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     backbone=dict(
14 |         type='CGNet',
15 |         norm_cfg=norm_cfg,
16 |         in_channels=3,
17 |         num_channels=(32, 64, 128),
18 |         num_blocks=(3, 21),
19 |         dilations=(2, 4),
20 |         reductions=(8, 16)),
21 |     decode_head=dict(
22 |         type='FCNHead',
23 |         in_channels=256,
24 |         in_index=2,
25 |         channels=256,
26 |         num_convs=0,
27 |         concat_input=False,
28 |         dropout_ratio=0,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         loss_decode=dict(
32 |             type='CrossEntropyLoss',
33 |             use_sigmoid=False,
34 |             loss_weight=1.0,
35 |             class_weight=[
36 |                 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
37 |                 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
38 |                 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
39 |                 10.396974, 10.055647
40 |             ])),
41 |     # model training and testing settings
42 |     train_cfg=dict(sampler=None),
43 |     test_cfg=dict(mode='whole'))
44 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/danet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pam_channels=64,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ASPPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dilations=(1, 12, 24, 36),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='ASPPHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=16,
36 |         dilations=(1, 12, 24, 36),
37 |         dropout_ratio=0.1,
38 |         num_classes=2,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=128,
46 |         in_index=3,
47 |         channels=64,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=2,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
59 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3plus_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DepthwiseSeparableASPPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dilations=(1, 12, 24, 36),
31 |         c1_in_channels=256,
32 |         c1_channels=48,
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dmnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DMHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         filter_sizes=(1, 3, 5, 7),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dnl_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DNLHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dropout_ratio=0.1,
31 |         reduction=2,
32 |         use_scale=True,
33 |         mode='embedded_gaussian',
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dpt_vit-b16.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
13 |     backbone=dict(
14 |         type='VisionTransformer',
15 |         img_size=224,
16 |         embed_dims=768,
17 |         num_layers=12,
18 |         num_heads=12,
19 |         out_indices=(2, 5, 8, 11),
20 |         final_norm=False,
21 |         with_cls_token=True,
22 |         output_cls_token=True),
23 |     decode_head=dict(
24 |         type='DPTHead',
25 |         in_channels=(768, 768, 768, 768),
26 |         channels=256,
27 |         embed_dims=768,
28 |         post_process_channels=[96, 192, 384, 768],
29 |         num_classes=150,
30 |         readout_type='project',
31 |         input_transform='multiple_select',
32 |         in_index=(0, 1, 2, 3),
33 |         norm_cfg=norm_cfg,
34 |         loss_decode=dict(
35 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |     auxiliary_head=None,
37 |     # model training and testing settings
38 |     train_cfg=dict(),
39 |     test_cfg=dict(mode='whole'))  # yapf: disable
40 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/emanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='EMAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=256,
30 |         ema_channels=512,
31 |         num_bases=64,
32 |         num_stages=3,
33 |         momentum=0.1,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     auxiliary_head=dict(
41 |         type='FCNHead',
42 |         in_channels=1024,
43 |         in_index=2,
44 |         channels=256,
45 |         num_convs=1,
46 |         concat_input=False,
47 |         dropout_ratio=0.1,
48 |         num_classes=19,
49 |         norm_cfg=norm_cfg,
50 |         align_corners=False,
51 |         loss_decode=dict(
52 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='whole'))
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/encnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='EncHead',
27 |         in_channels=[512, 1024, 2048],
28 |         in_index=(1, 2, 3),
29 |         channels=512,
30 |         num_codes=32,
31 |         use_se_loss=True,
32 |         add_lateral=False,
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |         loss_se_decode=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
41 |     auxiliary_head=dict(
42 |         type='FCNHead',
43 |         in_channels=1024,
44 |         in_index=2,
45 |         channels=256,
46 |         num_convs=1,
47 |         concat_input=False,
48 |         dropout_ratio=0.1,
49 |         num_classes=19,
50 |         norm_cfg=norm_cfg,
51 |         align_corners=False,
52 |         loss_decode=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54 |     # model training and testing settings
55 |     train_cfg=dict(),
56 |     test_cfg=dict(mode='whole'))
57 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/erfnet_fcn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='ERFNet',
16 |         in_channels=3,
17 |         enc_downsample_channels=(16, 64, 128),
18 |         enc_stage_non_bottlenecks=(5, 8),
19 |         enc_non_bottleneck_dilations=(2, 4, 8, 16),
20 |         enc_non_bottleneck_channels=(64, 128),
21 |         dec_upsample_channels=(64, 16),
22 |         dec_stages_non_bottleneck=(2, 2),
23 |         dec_non_bottleneck_channels=(64, 16),
24 |         dropout_ratio=0.1,
25 |         init_cfg=None),
26 |     decode_head=dict(
27 |         type='FCNHead',
28 |         in_channels=16,
29 |         channels=128,
30 |         num_convs=1,
31 |         concat_input=False,
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     # model training and testing settings
39 |     train_cfg=dict(),
40 |     test_cfg=dict(mode='whole'))
41 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         dilations=(1, 1, 2, 4),
19 |         strides=(1, 2, 2, 2),
20 |         out_indices=(1, 2, 3),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     neck=dict(
26 |         type='JPU',
27 |         in_channels=(512, 1024, 2048),
28 |         mid_channels=512,
29 |         start_level=0,
30 |         end_level=-1,
31 |         dilations=(1, 2, 4, 8),
32 |         align_corners=False,
33 |         norm_cfg=norm_cfg),
34 |     decode_head=dict(
35 |         type='PSPHead',
36 |         in_channels=2048,
37 |         in_index=2,
38 |         channels=512,
39 |         pool_scales=(1, 2, 3, 6),
40 |         dropout_ratio=0.1,
41 |         num_classes=19,
42 |         norm_cfg=norm_cfg,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
46 |     auxiliary_head=dict(
47 |         type='FCNHead',
48 |         in_channels=1024,
49 |         in_index=1,
50 |         channels=256,
51 |         num_convs=1,
52 |         concat_input=False,
53 |         dropout_ratio=0.1,
54 |         num_classes=19,
55 |         norm_cfg=norm_cfg,
56 |         align_corners=False,
57 |         loss_decode=dict(
58 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
59 |     # model training and testing settings
60 |     train_cfg=dict(),
61 |     test_cfg=dict(mode='whole'))
62 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fcn_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='FCNHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         num_convs=2,
31 |         concat_input=True,
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=2,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fcn_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='FCNHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=64,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=2,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
44 |     auxiliary_head=dict(
45 |         type='FCNHead',
46 |         in_channels=128,
47 |         in_index=3,
48 |         channels=64,
49 |         num_convs=1,
50 |         concat_input=False,
51 |         dropout_ratio=0.1,
52 |         num_classes=2,
53 |         norm_cfg=norm_cfg,
54 |         align_corners=False,
55 |         loss_decode=dict(
56 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
57 |     # model training and testing settings
58 |     train_cfg=dict(),
59 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fpn_poolformer_s12.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth'  # noqa
 4 | # TODO: delete custom_imports after mmpretrain supports auto import
 5 | # please install mmpretrain >= 1.0.0rc7
 6 | # import mmpretrain.models to trigger register_module in mmpretrain
 7 | custom_imports = dict(
 8 |     imports=['mmpretrain.models'], allow_failed_imports=False)
 9 | data_preprocessor = dict(
10 |     type='SegDataPreProcessor',
11 |     mean=[123.675, 116.28, 103.53],
12 |     std=[58.395, 57.12, 57.375],
13 |     bgr_to_rgb=True,
14 |     pad_val=0,
15 |     seg_pad_val=255)
16 | model = dict(
17 |     type='EncoderDecoder',
18 |     data_preprocessor=data_preprocessor,
19 |     backbone=dict(
20 |         type='mmpretrain.PoolFormer',
21 |         arch='s12',
22 |         init_cfg=dict(
23 |             type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
24 |         in_patch_size=7,
25 |         in_stride=4,
26 |         in_pad=2,
27 |         down_patch_size=3,
28 |         down_stride=2,
29 |         down_pad=1,
30 |         drop_rate=0.,
31 |         drop_path_rate=0.,
32 |         out_indices=(0, 2, 4, 6),
33 |         frozen_stages=0,
34 |     ),
35 |     neck=dict(
36 |         type='FPN',
37 |         in_channels=[256, 512, 1024, 2048],
38 |         out_channels=256,
39 |         num_outs=4),
40 |     decode_head=dict(
41 |         type='FPNHead',
42 |         in_channels=[256, 256, 256, 256],
43 |         in_index=[0, 1, 2, 3],
44 |         feature_strides=[4, 8, 16, 32],
45 |         channels=128,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 1, 1),
20 |         strides=(1, 2, 2, 2),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     neck=dict(
26 |         type='FPN',
27 |         in_channels=[256, 512, 1024, 2048],
28 |         out_channels=256,
29 |         num_outs=4),
30 |     decode_head=dict(
31 |         type='FPNHead',
32 |         in_channels=[256, 256, 256, 256],
33 |         in_index=[0, 1, 2, 3],
34 |         feature_strides=[4, 8, 16, 32],
35 |         channels=128,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/gcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='GCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         ratio=1 / 4.,
31 |         pooling_type='att',
32 |         fusion_types=('channel_add', ),
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/isanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ISAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         isa_channels=256,
31 |         down_factor=(8, 8),
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=2,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/lraspp_m-v3-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     backbone=dict(
14 |         type='MobileNetV3',
15 |         arch='large',
16 |         out_indices=(1, 3, 16),
17 |         norm_cfg=norm_cfg),
18 |     decode_head=dict(
19 |         type='LRASPPHead',
20 |         in_channels=(16, 24, 960),
21 |         in_index=(0, 1, 2),
22 |         channels=128,
23 |         input_transform='multiple_select',
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         act_cfg=dict(type='ReLU'),
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     # model training and testing settings
32 |     train_cfg=dict(),
33 |     test_cfg=dict(mode='whole'))
34 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/nonlocal_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='NLHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dropout_ratio=0.1,
31 |         reduction=2,
32 |         use_scale=True,
33 |         mode='embedded_gaussian',
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ocrnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='CascadeEncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     num_stages=2,
14 |     pretrained='open-mmlab://resnet50_v1c',
15 |     backbone=dict(
16 |         type='ResNetV1c',
17 |         depth=50,
18 |         num_stages=4,
19 |         out_indices=(0, 1, 2, 3),
20 |         dilations=(1, 1, 2, 4),
21 |         strides=(1, 2, 1, 1),
22 |         norm_cfg=norm_cfg,
23 |         norm_eval=False,
24 |         style='pytorch',
25 |         contract_dilation=True),
26 |     decode_head=[
27 |         dict(
28 |             type='FCNHead',
29 |             in_channels=1024,
30 |             in_index=2,
31 |             channels=256,
32 |             num_convs=1,
33 |             concat_input=False,
34 |             dropout_ratio=0.1,
35 |             num_classes=19,
36 |             norm_cfg=norm_cfg,
37 |             align_corners=False,
38 |             loss_decode=dict(
39 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
40 |         dict(
41 |             type='OCRHead',
42 |             in_channels=2048,
43 |             in_index=3,
44 |             channels=512,
45 |             ocr_channels=256,
46 |             dropout_ratio=0.1,
47 |             num_classes=19,
48 |             norm_cfg=norm_cfg,
49 |             align_corners=False,
50 |             loss_decode=dict(
51 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
52 |     ],
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='whole'))
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/psanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='PSAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         mask_size=(97, 97),
31 |         psa_type='bi-direction',
32 |         compact=False,
33 |         shrink_factor=2,
34 |         normalization_factor=1.0,
35 |         psa_softmax=True,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=1024,
45 |         in_index=2,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/pspnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='PSPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pool_scales=(1, 2, 3, 6),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/pspnet_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='PSPHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=16,
36 |         pool_scales=(1, 2, 3, 6),
37 |         dropout_ratio=0.1,
38 |         num_classes=2,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=128,
46 |         in_index=3,
47 |         channels=64,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=2,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
59 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/segformer_mit-b0.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='MixVisionTransformer',
16 |         in_channels=3,
17 |         embed_dims=32,
18 |         num_stages=4,
19 |         num_layers=[2, 2, 2, 2],
20 |         num_heads=[1, 2, 5, 8],
21 |         patch_sizes=[7, 3, 3, 3],
22 |         sr_ratios=[8, 4, 2, 1],
23 |         out_indices=(0, 1, 2, 3),
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         drop_rate=0.0,
27 |         attn_drop_rate=0.0,
28 |         drop_path_rate=0.1),
29 |     decode_head=dict(
30 |         type='SegformerHead',
31 |         in_channels=[32, 64, 160, 256],
32 |         in_index=[0, 1, 2, 3],
33 |         channels=256,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     # model training and testing settings
41 |     train_cfg=dict(),
42 |     test_cfg=dict(mode='whole'))
43 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/segmenter_vit-b16_mask.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth'  # noqa
 2 | # model settings
 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[127.5, 127.5, 127.5],
 7 |     std=[127.5, 127.5, 127.5],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=checkpoint,
15 |     backbone=dict(
16 |         type='VisionTransformer',
17 |         img_size=(512, 512),
18 |         patch_size=16,
19 |         in_channels=3,
20 |         embed_dims=768,
21 |         num_layers=12,
22 |         num_heads=12,
23 |         drop_path_rate=0.1,
24 |         attn_drop_rate=0.0,
25 |         drop_rate=0.0,
26 |         final_norm=True,
27 |         norm_cfg=backbone_norm_cfg,
28 |         with_cls_token=True,
29 |         interpolate_mode='bicubic',
30 |     ),
31 |     decode_head=dict(
32 |         type='SegmenterMaskTransformerHead',
33 |         in_channels=768,
34 |         channels=768,
35 |         num_classes=150,
36 |         num_layers=2,
37 |         num_heads=12,
38 |         embed_dims=768,
39 |         dropout_ratio=0.0,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
42 |     ),
43 |     test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)),
44 | )
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/twins_pcpvt-s_fpn.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth'  # noqa
 2 | 
 3 | # model settings
 4 | backbone_norm_cfg = dict(type='LN')
 5 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 6 | data_preprocessor = dict(
 7 |     type='SegDataPreProcessor',
 8 |     mean=[123.675, 116.28, 103.53],
 9 |     std=[58.395, 57.12, 57.375],
10 |     bgr_to_rgb=True,
11 |     pad_val=0,
12 |     seg_pad_val=255)
13 | model = dict(
14 |     type='EncoderDecoder',
15 |     data_preprocessor=data_preprocessor,
16 |     backbone=dict(
17 |         type='PCPVT',
18 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
19 |         in_channels=3,
20 |         embed_dims=[64, 128, 320, 512],
21 |         num_heads=[1, 2, 5, 8],
22 |         patch_sizes=[4, 2, 2, 2],
23 |         strides=[4, 2, 2, 2],
24 |         mlp_ratios=[8, 8, 4, 4],
25 |         out_indices=(0, 1, 2, 3),
26 |         qkv_bias=True,
27 |         norm_cfg=backbone_norm_cfg,
28 |         depths=[3, 4, 6, 3],
29 |         sr_ratios=[8, 4, 2, 1],
30 |         norm_after_stage=False,
31 |         drop_rate=0.0,
32 |         attn_drop_rate=0.,
33 |         drop_path_rate=0.2),
34 |     neck=dict(
35 |         type='FPN',
36 |         in_channels=[64, 128, 320, 512],
37 |         out_channels=256,
38 |         num_outs=4),
39 |     decode_head=dict(
40 |         type='FPNHead',
41 |         in_channels=[256, 256, 256, 256],
42 |         in_index=[0, 1, 2, 3],
43 |         feature_strides=[4, 8, 16, 32],
44 |         channels=128,
45 |         dropout_ratio=0.1,
46 |         num_classes=150,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_beit.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained=None,
13 |     backbone=dict(
14 |         type='BEiT',
15 |         img_size=(640, 640),
16 |         patch_size=16,
17 |         in_channels=3,
18 |         embed_dims=768,
19 |         num_layers=12,
20 |         num_heads=12,
21 |         mlp_ratio=4,
22 |         out_indices=(3, 5, 7, 11),
23 |         qv_bias=True,
24 |         attn_drop_rate=0.0,
25 |         drop_path_rate=0.1,
26 |         norm_cfg=dict(type='LN', eps=1e-6),
27 |         act_cfg=dict(type='GELU'),
28 |         norm_eval=False,
29 |         init_values=0.1),
30 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
31 |     decode_head=dict(
32 |         type='UPerHead',
33 |         in_channels=[768, 768, 768, 768],
34 |         in_index=[0, 1, 2, 3],
35 |         pool_scales=(1, 2, 3, 6),
36 |         channels=768,
37 |         dropout_ratio=0.1,
38 |         num_classes=150,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=768,
46 |         in_index=2,
47 |         channels=256,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=150,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='whole'))
59 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_convnext.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False)
 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth'  # noqa
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[123.675, 116.28, 103.53],
 7 |     std=[58.395, 57.12, 57.375],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=None,
15 |     backbone=dict(
16 |         type='mmpretrain.ConvNeXt',
17 |         arch='base',
18 |         out_indices=[0, 1, 2, 3],
19 |         drop_path_rate=0.4,
20 |         layer_scale_init_value=1.0,
21 |         gap_before_final_norm=False,
22 |         init_cfg=dict(
23 |             type='Pretrained', checkpoint=checkpoint_file,
24 |             prefix='backbone.')),
25 |     decode_head=dict(
26 |         type='UPerHead',
27 |         in_channels=[128, 256, 512, 1024],
28 |         in_index=[0, 1, 2, 3],
29 |         pool_scales=(1, 2, 3, 6),
30 |         channels=512,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=384,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_mae.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained=None,
13 |     backbone=dict(
14 |         type='MAE',
15 |         img_size=(640, 640),
16 |         patch_size=16,
17 |         in_channels=3,
18 |         embed_dims=768,
19 |         num_layers=12,
20 |         num_heads=12,
21 |         mlp_ratio=4,
22 |         out_indices=(3, 5, 7, 11),
23 |         attn_drop_rate=0.0,
24 |         drop_path_rate=0.1,
25 |         norm_cfg=dict(type='LN', eps=1e-6),
26 |         act_cfg=dict(type='GELU'),
27 |         norm_eval=False,
28 |         init_values=0.1),
29 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
30 |     decode_head=dict(
31 |         type='UPerHead',
32 |         in_channels=[384, 384, 384, 384],
33 |         in_index=[0, 1, 2, 3],
34 |         pool_scales=(1, 2, 3, 6),
35 |         channels=512,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=384,
45 |         in_index=2,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 1, 1),
20 |         strides=(1, 2, 2, 2),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='UPerHead',
27 |         in_channels=[256, 512, 1024, 2048],
28 |         in_index=[0, 1, 2, 3],
29 |         pool_scales=(1, 2, 3, 6),
30 |         channels=512,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=160000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 160k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=20000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 20k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_240k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=240000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 240k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=240000, val_interval=24000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_25k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='LinearLR', start_factor=3e-2, begin=0, end=12000,
 8 |         by_epoch=False),
 9 |     dict(
10 |         type='PolyLRRatio',
11 |         eta_min_ratio=3e-2,
12 |         power=0.9,
13 |         begin=12000,
14 |         end=24000,
15 |         by_epoch=False),
16 |     dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000)
17 | ]
18 | # training schedule for 25k
19 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000)
20 | val_cfg = dict(type='ValLoop')
21 | test_cfg = dict(type='TestLoop')
22 | default_hooks = dict(
23 |     timer=dict(type='IterTimerHook'),
24 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
25 |     param_scheduler=dict(type='ParamSchedulerHook'),
26 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
27 |     sampler_seed=dict(type='DistSamplerSeedHook'),
28 |     visualization=dict(type='SegVisualizationHook'))
29 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=320000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 320k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=320000, val_interval=32000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=40000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 40k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=80000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 80k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
10 |     auxiliary_head=dict(in_channels=512, num_classes=150),
11 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
12 | )
13 | 
14 | optim_wrapper = dict(
15 |     _delete_=True,
16 |     type='AmpOptimWrapper',
17 |     optimizer=dict(
18 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
19 |     paramwise_cfg={
20 |         'decay_rate': 0.9,
21 |         'decay_type': 'stage_wise',
22 |         'num_layers': 12
23 |     },
24 |     constructor='LearningRateDecayOptimizerConstructor',
25 |     loss_scale='dynamic')
26 | 
27 | param_scheduler = [
28 |     dict(
29 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
30 |     dict(
31 |         type='PolyLR',
32 |         power=1.0,
33 |         begin=1500,
34 |         end=160000,
35 |         eta_min=0.0,
36 |         by_epoch=False,
37 |     )
38 | ]
39 | 
40 | # By default, models are trained on 8 GPUs with 2 images per GPU
41 | train_dataloader = dict(batch_size=2)
42 | val_dataloader = dict(batch_size=1)
43 | test_dataloader = val_dataloader
44 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         pretrain_img_size=384,
 9 |         embed_dims=128,
10 |         depths=[2, 2, 18, 2],
11 |         num_heads=[4, 8, 16, 32],
12 |         window_size=12),
13 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
14 |     auxiliary_head=dict(in_channels=512, num_classes=150))
15 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     './swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py'  # noqa
3 | ]
4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth'  # noqa
5 | model = dict(
6 |     backbone=dict(
7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)))
8 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         embed_dims=128,
 9 |         depths=[2, 2, 18, 2],
10 |         num_heads=[4, 8, 16, 32]),
11 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
12 |     auxiliary_head=dict(in_channels=512, num_classes=150))
13 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     './swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
3 | ]
4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth'  # noqa
5 | model = dict(
6 |     backbone=dict(
7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)))
8 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-large-patch4-window7-in22k-pre_upernet_'
 3 |     '8xb2-160k_ade20k-512x512.py'
 4 | ]
 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth'  # noqa
 6 | model = dict(
 7 |     backbone=dict(
 8 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 9 |         pretrain_img_size=384,
10 |         window_size=12))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_'
 3 |     'ade20k-512x512.py'
 4 | ]
 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth'  # noqa
 6 | model = dict(
 7 |     backbone=dict(
 8 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 9 |         pretrain_img_size=224,
10 |         embed_dims=192,
11 |         depths=[2, 2, 18, 2],
12 |         num_heads=[6, 12, 24, 48],
13 |         window_size=7),
14 |     decode_head=dict(in_channels=[192, 384, 768, 1536], num_classes=150),
15 |     auxiliary_head=dict(in_channels=768, num_classes=150))
16 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         depths=[2, 2, 18, 2]),
 9 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150),
10 |     auxiliary_head=dict(in_channels=384, num_classes=150))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth'  # noqa
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     backbone=dict(
11 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
12 |         embed_dims=96,
13 |         depths=[2, 2, 6, 2],
14 |         num_heads=[3, 6, 12, 24],
15 |         window_size=7,
16 |         use_abs_pos_embed=False,
17 |         drop_path_rate=0.3,
18 |         patch_norm=True),
19 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150),
20 |     auxiliary_head=dict(in_channels=384, num_classes=150))
21 | 
22 | # AdamW optimizer, no weight decay for position embedding & layer norm
23 | # in backbone
24 | optim_wrapper = dict(
25 |     _delete_=True,
26 |     type='OptimWrapper',
27 |     optimizer=dict(
28 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
29 |     paramwise_cfg=dict(
30 |         custom_keys={
31 |             'absolute_pos_embed': dict(decay_mult=0.),
32 |             'relative_position_bias_table': dict(decay_mult=0.),
33 |             'norm': dict(decay_mult=0.)
34 |         }))
35 | 
36 | param_scheduler = [
37 |     dict(
38 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
39 |     dict(
40 |         type='PolyLR',
41 |         eta_min=0.0,
42 |         power=1.0,
43 |         begin=1500,
44 |         end=160000,
45 |         by_epoch=False,
46 |     )
47 | ]
48 | 
49 | # By default, models are trained on 8 GPUs with 2 images per GPU
50 | train_dataloader = dict(batch_size=2)
51 | val_dataloader = dict(batch_size=1)
52 | test_dataloader = val_dataloader
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_swin.py', '../_base_/datasets/levir_256x256.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
 4 | ]
 5 | crop_size = (256, 256)
 6 | norm_cfg = dict(type='BN', requires_grad=True)
 7 | data_preprocessor = dict(
 8 |     size=crop_size,
 9 |     type='SegDataPreProcessor',
10 |     mean=[123.675, 116.28, 103.53, 123.675, 116.28, 103.53],
11 |     std=[58.395, 57.12, 57.375, 58.395, 57.12, 57.375])
12 | 
13 | model = dict(
14 |     data_preprocessor=data_preprocessor,
15 |     backbone=dict(
16 |         in_channels=6,
17 |         embed_dims=96,
18 |         depths=[2, 2, 6, 2],
19 |         num_heads=[3, 6, 12, 24],
20 |         window_size=7,
21 |         use_abs_pos_embed=False,
22 |         drop_path_rate=0.3,
23 |         patch_norm=True),
24 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=2),
25 |     auxiliary_head=dict(in_channels=384, num_classes=2))
26 | 
27 | # AdamW optimizer, no weight decay for position embedding & layer norm
28 | # in backbone
29 | optim_wrapper = dict(
30 |     _delete_=True,
31 |     type='OptimWrapper',
32 |     optimizer=dict(
33 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
34 |     paramwise_cfg=dict(
35 |         custom_keys={
36 |             'absolute_pos_embed': dict(decay_mult=0.),
37 |             'relative_position_bias_table': dict(decay_mult=0.),
38 |             'norm': dict(decay_mult=0.)
39 |         }))
40 | 
41 | param_scheduler = [
42 |     dict(
43 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
44 |     dict(
45 |         type='PolyLR',
46 |         eta_min=0.0,
47 |         power=1.0,
48 |         begin=1500,
49 |         end=20000,
50 |         by_epoch=False,
51 |     )
52 | ]
53 | 
54 | train_dataloader = dict(batch_size=4)
55 | val_dataloader = dict(batch_size=1)
56 | test_dataloader = val_dataloader
57 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-769x769.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-769x769.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-160k_ade20k-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-20k_voc12aug-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-40k_voc12aug-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-80k_ade20k-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py'
2 | model = dict(
3 |     pretrained='open-mmlab://resnet18_v1c',
4 |     backbone=dict(depth=18),
5 |     decode_head=dict(in_channels=[64, 128, 256, 512]),
6 |     auxiliary_head=dict(in_channels=256))
7 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py'
2 | model = dict(
3 |     pretrained='open-mmlab://resnet18_v1c',
4 |     backbone=dict(depth=18),
5 |     decode_head=dict(in_channels=[64, 128, 256, 512]),
6 |     auxiliary_head=dict(in_channels=256))
7 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | model = dict(
 6 |     pretrained='open-mmlab://resnet18_v1c',
 7 |     backbone=dict(depth=18),
 8 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150),
 9 |     auxiliary_head=dict(in_channels=256, num_classes=150))
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_20k.py'
 5 | ]
 6 | model = dict(
 7 |     pretrained='open-mmlab://resnet18_v1c',
 8 |     backbone=dict(depth=18),
 9 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21),
10 |     auxiliary_head=dict(in_channels=256, num_classes=21))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | model = dict(
 7 |     pretrained='open-mmlab://resnet18_v1c',
 8 |     backbone=dict(depth=18),
 9 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21),
10 |     auxiliary_head=dict(in_channels=256, num_classes=21))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | model = dict(
 6 |     pretrained='open-mmlab://resnet18_v1c',
 7 |     backbone=dict(depth=18),
 8 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150),
 9 |     auxiliary_head=dict(in_channels=256, num_classes=150))
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
4 | ]
5 | crop_size = (512, 1024)
6 | data_preprocessor = dict(size=crop_size)
7 | model = dict(data_preprocessor=data_preprocessor)
8 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | crop_size = (769, 769)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(align_corners=True),
11 |     auxiliary_head=dict(align_corners=True),
12 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
13 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 | crop_size = (512, 1024)
6 | data_preprocessor = dict(size=crop_size)
7 | model = dict(data_preprocessor=data_preprocessor)
8 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_80k.py'
 5 | ]
 6 | crop_size = (769, 769)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(align_corners=True),
11 |     auxiliary_head=dict(align_corners=True),
12 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
13 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(num_classes=150),
10 |     auxiliary_head=dict(num_classes=150))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_20k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(num_classes=21),
11 |     auxiliary_head=dict(num_classes=21))
12 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(num_classes=21),
11 |     auxiliary_head=dict(num_classes=21))
12 | 


--------------------------------------------------------------------------------
/segmentation/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(num_classes=150),
10 |     auxiliary_head=dict(num_classes=150))
11 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1, final_norm=True))
6 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 | )
7 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 |     neck=None)
7 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 |     neck=None)
7 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
 2 | 
 3 | model = dict(
 4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
 5 |     backbone=dict(
 6 |         num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True),
 7 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
 8 |     neck=dict(in_channels=[384, 384, 384, 384], out_channels=384),
 9 |     auxiliary_head=dict(num_classes=150, in_channels=384))
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=dict(in_channels=[384, 384, 384, 384], out_channels=384),
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=None,
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=None,
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     backbone=dict(drop_path_rate=0.1, final_norm=True),
12 |     decode_head=dict(num_classes=150),
13 |     auxiliary_head=dict(num_classes=150))
14 | 
15 | # AdamW optimizer, no weight decay for position embedding & layer norm
16 | # in backbone
17 | optim_wrapper = dict(
18 |     _delete_=True,
19 |     type='OptimWrapper',
20 |     optimizer=dict(
21 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
22 |     paramwise_cfg=dict(
23 |         custom_keys={
24 |             'pos_embed': dict(decay_mult=0.),
25 |             'cls_token': dict(decay_mult=0.),
26 |             'norm': dict(decay_mult=0.)
27 |         }))
28 | 
29 | param_scheduler = [
30 |     dict(
31 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
32 |     dict(
33 |         type='PolyLR',
34 |         eta_min=0.0,
35 |         power=1.0,
36 |         begin=1500,
37 |         end=160000,
38 |         by_epoch=False,
39 |     )
40 | ]
41 | 
42 | # By default, models are trained on 8 GPUs with 2 images per GPU
43 | train_dataloader = dict(batch_size=2)
44 | val_dataloader = dict(batch_size=1)
45 | test_dataloader = val_dataloader
46 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     decode_head=dict(num_classes=150),
12 |     auxiliary_head=dict(num_classes=150))
13 | 
14 | # AdamW optimizer, no weight decay for position embedding & layer norm
15 | # in backbone
16 | optim_wrapper = dict(
17 |     _delete_=True,
18 |     type='OptimWrapper',
19 |     optimizer=dict(
20 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
21 |     paramwise_cfg=dict(
22 |         custom_keys={
23 |             'pos_embed': dict(decay_mult=0.),
24 |             'cls_token': dict(decay_mult=0.),
25 |             'norm': dict(decay_mult=0.)
26 |         }))
27 | 
28 | param_scheduler = [
29 |     dict(
30 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
31 |     dict(
32 |         type='PolyLR',
33 |         eta_min=0.0,
34 |         power=1.0,
35 |         begin=1500,
36 |         end=160000,
37 |         by_epoch=False,
38 |     )
39 | ]
40 | 
41 | # By default, models are trained on 8 GPUs with 2 images per GPU
42 | train_dataloader = dict(batch_size=2)
43 | val_dataloader = dict(batch_size=1)
44 | test_dataloader = val_dataloader
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_80k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     decode_head=dict(num_classes=150),
12 |     auxiliary_head=dict(num_classes=150))
13 | 
14 | # AdamW optimizer, no weight decay for position embedding & layer norm
15 | # in backbone
16 | optim_wrapper = dict(
17 |     _delete_=True,
18 |     type='OptimWrapper',
19 |     optimizer=dict(
20 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
21 |     paramwise_cfg=dict(
22 |         custom_keys={
23 |             'pos_embed': dict(decay_mult=0.),
24 |             'cls_token': dict(decay_mult=0.),
25 |             'norm': dict(decay_mult=0.)
26 |         }))
27 | 
28 | param_scheduler = [
29 |     dict(
30 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
31 |     dict(
32 |         type='PolyLR',
33 |         eta_min=0.0,
34 |         power=1.0,
35 |         begin=1500,
36 |         end=80000,
37 |         by_epoch=False,
38 |     )
39 | ]
40 | 
41 | # By default, models are trained on 8 GPUs with 2 images per GPU
42 | train_dataloader = dict(batch_size=2)
43 | val_dataloader = dict(batch_size=1)
44 | test_dataloader = val_dataloader
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MMSEG_VSSM',
 7 |         depths=(2, 2, 27, 2),
 8 |         dims=128,
 9 |         out_indices=(0, 1, 2, 3),
10 |         pretrained="../../ckpts/vssmbase/ckpt_epoch_260.pth",
11 |     ),)
12 | # train_dataloader = dict(batch_size=4) # as gpus=4
13 | 
14 | 


--------------------------------------------------------------------------------
/segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MMSEG_VSSM',
 7 |         depths=(2, 2, 27, 2),
 8 |         dims=96,
 9 |         out_indices=(0, 1, 2, 3),
10 |         pretrained="../../ckpts/vssmsmall/ema_ckpt_epoch_238.pth",
11 |     ),)
12 | # train_dataloader = dict(batch_size=4) # as gpus=4
13 | 
14 | 


--------------------------------------------------------------------------------
/segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MMSEG_VSSM',
 7 |         depths=(2, 2, 9, 2),
 8 |         dims=96,
 9 |         out_indices=(0, 1, 2, 3),
10 |         pretrained="../../ckpts/vssmtiny/ckpt_epoch_292.pth",
11 |     ),)
12 | # train_dataloader = dict(batch_size=4) # as gpus=4
13 | 
14 | 


--------------------------------------------------------------------------------
/segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-640x640_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './upernet_swin_4xb4-160k_ade20k-640x640_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MMSEG_VSSM',
 7 |         depths=(2, 2, 27, 2),
 8 |         dims=96,
 9 |         out_indices=(0, 1, 2, 3),
10 |         pretrained="../../ckpts/vssmsmall/ckpt_epoch_238.pth",
11 |     ),)
12 | # train_dataloader = dict(batch_size=4) # as gpus=4
13 | 
14 | 


--------------------------------------------------------------------------------
/segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-896x896_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './upernet_swin_4xb4-160k_ade20k-896x896_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MMSEG_VSSM',
 7 |         depths=(2, 2, 27, 2),
 8 |         dims=96,
 9 |         out_indices=(0, 1, 2, 3),
10 |         pretrained="../../ckpts/vssmsmall/ckpt_epoch_238.pth",
11 |     ),)
12 | train_dataloader = dict(batch_size=4) # as gpus=4
13 | 
14 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_convnext_4xb4-160k_ade20k-640x640_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py'
 3 | ]
 4 | crop_size = (640, 640)
 5 | data_preprocessor = dict(size=crop_size)
 6 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth'  # noqa
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     backbone=dict(
10 |         type='mmpretrain.ConvNeXt',
11 |         arch='small',
12 |         out_indices=[0, 1, 2, 3],
13 |         drop_path_rate=0.3,
14 |         layer_scale_init_value=1.0,
15 |         gap_before_final_norm=False,
16 |         init_cfg=dict(
17 |             type='Pretrained', checkpoint=checkpoint_file,
18 |             prefix='backbone.')),
19 |     decode_head=dict(
20 |         in_channels=[96, 192, 384, 768],
21 |         num_classes=150,
22 |     ),
23 |     auxiliary_head=dict(in_channels=384, num_classes=150),
24 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
25 | )
26 | 
27 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="../../ckpts/classification/outs/vssm/vssmbasedp05/vssmbase_dp05_ckpt_epoch_260.pth",
 9 |         # copied from classification/configs/vssm/vssm_base_224.yaml
10 |         dims=128,
11 |         depths=(2, 2, 27, 2),
12 |         ssm_d_state=16,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         mlp_ratio=0.0,
16 |         downsample_version="v1",
17 |         patchembed_version="v1",
18 |         # forward_type="v0", # if you want exactly the same
19 |     ),)
20 | # train_dataloader = dict(batch_size=4) # as gpus=4
21 | 
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 27, 2),
12 |         ssm_d_state=16,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         mlp_ratio=0.0,
16 |         downsample_version="v1",
17 |         patchembed_version="v1",
18 |         # forward_type="v0", # if you want exactly the same
19 |     ),)
20 | # train_dataloader = dict(batch_size=4) # as gpus=4
21 | 
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="../../ckpts/classification/outs/vssm/vssmtiny/vssmtiny_dp01_ckpt_epoch_292.pth",
 9 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 9, 2),
12 |         ssm_d_state=16,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         mlp_ratio=0.0,
16 |         downsample_version="v1",
17 |         patchembed_version="v1",
18 |         # forward_type="v0", # if you want exactly the same
19 |     ),)
20 | # train_dataloader = dict(batch_size=4) # as gpus=4
21 | 
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-640x640_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './upernet_swin_4xb4-160k_ade20k-640x640_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 27, 2),
12 |         ssm_d_state=16,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         mlp_ratio=0.0,
16 |         downsample_version="v1",
17 |         patchembed_version="v1",
18 |         # forward_type="v0", # if you want exactly the same
19 |     ),)
20 | # train_dataloader = dict(batch_size=4) # as gpus=4
21 | 
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-896x896_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './upernet_swin_4xb4-160k_ade20k-896x896_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 27, 2),
12 |         ssm_d_state=16,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         mlp_ratio=0.0,
16 |         downsample_version="v1",
17 |         patchembed_version="v1",
18 |         # forward_type="v0", # if you want exactly the same
19 |     ),)
20 | train_dataloader = dict(batch_size=4) # as gpus=4
21 | 
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_base_224.yaml
10 |         dims=128,
11 |         depths=(2, 2, 15, 2),
12 |         ssm_d_state=1,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         ssm_conv=3,
16 |         ssm_conv_bias=False,
17 |         forward_type="v05_noz", # v3_noz,
18 |         mlp_ratio=4.0,
19 |         downsample_version="v3",
20 |         patchembed_version="v2",
21 |         drop_path_rate=0.6,
22 |         norm_layer="ln2d",
23 |     ),)
24 | # train_dataloader = dict(batch_size=4) # as gpus=4
25 | 
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 15, 2),
12 |         ssm_d_state=1,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         ssm_conv=3,
16 |         ssm_conv_bias=False,
17 |         forward_type="v05_noz", # v3_noz,
18 |         mlp_ratio=4.0,
19 |         downsample_version="v3",
20 |         patchembed_version="v2",
21 |         drop_path_rate=0.3,
22 |         norm_layer="ln2d",
23 |     ),)
24 | # train_dataloader = dict(batch_size=4) # as gpus=4
25 | 
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
10 |         dims=96,
11 |         # depths=(2, 2, 5, 2),
12 |         depths=(2, 2, 8, 2),
13 |         ssm_d_state=1,
14 |         ssm_dt_rank="auto",
15 |         # ssm_ratio=2.0,
16 |         ssm_ratio=1.0,
17 |         ssm_conv=3,
18 |         ssm_conv_bias=False,
19 |         forward_type="v05_noz", # v3_noz,
20 |         mlp_ratio=4.0,
21 |         downsample_version="v3",
22 |         patchembed_version="v2",
23 |         drop_path_rate=0.2,
24 |         norm_layer="ln2d",
25 |     ),)
26 | # train_dataloader = dict(batch_size=4) # as gpus=4
27 | 
28 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny1.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 5, 2),
12 |         ssm_d_state=1,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         ssm_conv=3,
16 |         ssm_conv_bias=False,
17 |         forward_type="v05_noz", # v3_noz,
18 |         mlp_ratio=4.0,
19 |         downsample_version="v3",
20 |         patchembed_version="v2",
21 |         drop_path_rate=0.2,
22 |         norm_layer="ln2d",
23 |     ),)
24 | # train_dataloader = dict(batch_size=4) # as gpus=4
25 | 
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-640x640_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../vssm/upernet_swin_4xb4-160k_ade20k-640x640_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 15, 2),
12 |         ssm_d_state=1,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         ssm_conv=3,
16 |         ssm_conv_bias=False,
17 |         forward_type="v05_noz", # v3_noz,
18 |         mlp_ratio=4.0,
19 |         downsample_version="v3",
20 |         patchembed_version="v2",
21 |         drop_path_rate=0.3,
22 |         norm_layer="ln2d",
23 |     ),)
24 | # train_dataloader = dict(batch_size=4) # as gpus=4
25 | 
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-896x896_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../vssm/upernet_swin_4xb4-160k_ade20k-896x896_small.py'
 3 | ]
 4 | model = dict(
 5 |     backbone=dict(
 6 |         type='MM_VSSM',
 7 |         out_indices=(0, 1, 2, 3),
 8 |         pretrained="",
 9 |         # copied from classification/configs/vssm/vssm_small_224.yaml
10 |         dims=96,
11 |         depths=(2, 2, 15, 2),
12 |         ssm_d_state=1,
13 |         ssm_dt_rank="auto",
14 |         ssm_ratio=2.0,
15 |         ssm_conv=3,
16 |         ssm_conv_bias=False,
17 |         forward_type="v05_noz", # v3_noz,
18 |         mlp_ratio=4.0,
19 |         downsample_version="v3",
20 |         patchembed_version="v2",
21 |         drop_path_rate=0.3,
22 |         norm_layer="ln2d",
23 |     ),)
24 | train_dataloader = dict(batch_size=4) # as gpus=4
25 | 
26 | 


--------------------------------------------------------------------------------
/segmentation/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from functools import partial
 3 | from typing import Callable
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.utils import checkpoint
 8 | 
 9 | from mmengine.model import BaseModule
10 | from mmdet.registry import MODELS as MODELS_MMDET
11 | from mmseg.registry import MODELS as MODELS_MMSEG
12 | 
13 | def import_abspy(name="models", path="classification/"):
14 |     import sys
15 |     import importlib
16 |     path = os.path.abspath(path)
17 |     assert os.path.isdir(path)
18 |     sys.path.insert(0, path)
19 |     module = importlib.import_module(name)
20 |     sys.path.pop(0)
21 |     return module
22 | 
23 | build = import_abspy(
24 |     "models", 
25 |     os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"),
26 | )
27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM
28 | 
29 | @MODELS_MMSEG.register_module()
30 | @MODELS_MMDET.register_module()
31 | class MM_VSSM(BaseModule, Backbone_VSSM):
32 |     def __init__(self, *args, **kwargs):
33 |         BaseModule.__init__(self)
34 |         Backbone_VSSM.__init__(self, *args, **kwargs)
35 | 
36 | 


--------------------------------------------------------------------------------
/segmentation/readme.md:
--------------------------------------------------------------------------------
1 | ## origins 
2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmsegmentation: `version 1.2.2`
3 | 
4 | ## modifications
5 | `tools/train.py#13` is added with `import model`
6 | `tools/test.py#8` is added with `import model`
7 | 
8 |  


--------------------------------------------------------------------------------
/segmentation/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | CHECKPOINT=$2
 3 | GPUS=$3
 4 | NNODES=${NNODES:-1}
 5 | NODE_RANK=${NODE_RANK:-0}
 6 | PORT=${PORT:-29500}
 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch \
11 |     --nnodes=$NNODES \
12 |     --node_rank=$NODE_RANK \
13 |     --master_addr=$MASTER_ADDR \
14 |     --nproc_per_node=$GPUS \
15 |     --master_port=$PORT \
16 |     $(dirname "$0")/test.py \
17 |     $CONFIG \
18 |     $CHECKPOINT \
19 |     --launcher pytorch \
20 |     ${@:4}
21 | 


--------------------------------------------------------------------------------
/segmentation/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | GPUS=$2
 3 | NNODES=${NNODES:-1}
 4 | NODE_RANK=${NODE_RANK:-0}
 5 | PORT=${PORT:-29500}
 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch \
10 |     --nnodes=$NNODES \
11 |     --node_rank=$NODE_RANK \
12 |     --master_addr=$MASTER_ADDR \
13 |     --nproc_per_node=$GPUS \
14 |     --master_port=$PORT \
15 |     $(dirname "$0")/train.py \
16 |     $CONFIG \
17 |     --launcher pytorch ${@:3}
18 | 


--------------------------------------------------------------------------------
/segmentation/tools/misc/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | from hashlib import sha256
 5 | 
 6 | import torch
 7 | 
 8 | BLOCK_SIZE = 128 * 1024
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(
13 |         description='Process a checkpoint to be published')
14 |     parser.add_argument('in_file', help='input checkpoint filename')
15 |     parser.add_argument('out_file', help='output checkpoint filename')
16 |     args = parser.parse_args()
17 |     return args
18 | 
19 | 
20 | def sha256sum(filename: str) -> str:
21 |     """Compute SHA256 message digest from a file."""
22 |     hash_func = sha256()
23 |     byte_array = bytearray(BLOCK_SIZE)
24 |     memory_view = memoryview(byte_array)
25 |     with open(filename, 'rb', buffering=0) as file:
26 |         for block in iter(lambda: file.readinto(memory_view), 0):
27 |             hash_func.update(memory_view[:block])
28 |     return hash_func.hexdigest()
29 | 
30 | 
31 | def process_checkpoint(in_file, out_file):
32 |     checkpoint = torch.load(in_file, map_location='cpu')
33 |     # remove optimizer for smaller file size
34 |     if 'optimizer' in checkpoint:
35 |         del checkpoint['optimizer']
36 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
37 |     # add the code here.
38 |     torch.save(checkpoint, out_file)
39 |     sha = sha256sum(in_file)
40 |     final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
41 |     subprocess.Popen(['mv', out_file, final_file])
42 | 
43 | 
44 | def main():
45 |     args = parse_args()
46 |     process_checkpoint(args.in_file, args.out_file)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/segmentation/tools/model_converters/beit2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmengine
 7 | import torch
 8 | from mmengine.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_beit(ckpt):
12 |     new_ckpt = OrderedDict()
13 | 
14 |     for k, v in ckpt.items():
15 |         if k.startswith('patch_embed'):
16 |             new_key = k.replace('patch_embed.proj', 'patch_embed.projection')
17 |             new_ckpt[new_key] = v
18 |         if k.startswith('blocks'):
19 |             new_key = k.replace('blocks', 'layers')
20 |             if 'norm' in new_key:
21 |                 new_key = new_key.replace('norm', 'ln')
22 |             elif 'mlp.fc1' in new_key:
23 |                 new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0')
24 |             elif 'mlp.fc2' in new_key:
25 |                 new_key = new_key.replace('mlp.fc2', 'ffn.layers.1')
26 |             new_ckpt[new_key] = v
27 |         else:
28 |             new_key = k
29 |             new_ckpt[new_key] = v
30 | 
31 |     return new_ckpt
32 | 
33 | 
34 | def main():
35 |     parser = argparse.ArgumentParser(
36 |         description='Convert keys in official pretrained beit models to'
37 |         'MMSegmentation style.')
38 |     parser.add_argument('src', help='src model path or url')
39 |     # The dst path must be a full path of the new checkpoint.
40 |     parser.add_argument('dst', help='save path')
41 |     args = parser.parse_args()
42 | 
43 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
44 |     if 'state_dict' in checkpoint:
45 |         state_dict = checkpoint['state_dict']
46 |     elif 'model' in checkpoint:
47 |         state_dict = checkpoint['model']
48 |     else:
49 |         state_dict = checkpoint
50 |     weight = convert_beit(state_dict)
51 |     mmengine.mkdir_or_exist(osp.dirname(args.dst))
52 |     torch.save(weight, args.dst)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/segmentation/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-4}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/segmentation/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-4}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/segmentation/tools/torchserve/test_torchserve.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from argparse import ArgumentParser
 3 | from io import BytesIO
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | import mmcv
 7 | import requests
 8 | 
 9 | from mmseg.apis import inference_model, init_model
10 | 
11 | 
12 | def parse_args():
13 |     parser = ArgumentParser(
14 |         description='Compare result of torchserve and pytorch,'
15 |         'and visualize them.')
16 |     parser.add_argument('img', help='Image file')
17 |     parser.add_argument('config', help='Config file')
18 |     parser.add_argument('checkpoint', help='Checkpoint file')
19 |     parser.add_argument('model_name', help='The model name in the server')
20 |     parser.add_argument(
21 |         '--inference-addr',
22 |         default='127.0.0.1:8080',
23 |         help='Address and port of the inference server')
24 |     parser.add_argument(
25 |         '--result-image',
26 |         type=str,
27 |         default=None,
28 |         help='save server output in result-image')
29 |     parser.add_argument(
30 |         '--device', default='cuda:0', help='Device used for inference')
31 | 
32 |     args = parser.parse_args()
33 |     return args
34 | 
35 | 
36 | def main(args):
37 |     url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
38 |     with open(args.img, 'rb') as image:
39 |         tmp_res = requests.post(url, image)
40 |     content = tmp_res.content
41 |     if args.result_image:
42 |         with open(args.result_image, 'wb') as out_image:
43 |             out_image.write(content)
44 |         plt.imshow(mmcv.imread(args.result_image, 'grayscale'))
45 |         plt.show()
46 |     else:
47 |         plt.imshow(plt.imread(BytesIO(content)))
48 |         plt.show()
49 |     model = init_model(args.config, args.checkpoint, args.device)
50 |     image = mmcv.imread(args.img)
51 |     result = inference_model(model, image)
52 |     plt.imshow(result[0])
53 |     plt.show()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     args = parse_args()
58 |     main(args)
59 | 


--------------------------------------------------------------------------------