├── .gitignore
├── 2DMambaMIL
    ├── README.md
    ├── dataset
    │   ├── csv_file
    │   ├── csv_files
    │   │   ├── classification
    │   │   │   ├── BRACS.csv
    │   │   │   ├── DHMC.csv
    │   │   │   ├── PANDA.csv
    │   │   │   ├── TCGA-BRCA-label.csv
    │   │   │   ├── TCGA-BRCA-split.csv
    │   │   │   ├── TCGA-NSCLC-label.csv.zip
    │   │   │   └── TCGA-NSCLC-split.csv
    │   │   └── survival
    │   │   │   ├── KIRC.csv
    │   │   │   ├── KIRP.csv
    │   │   │   ├── LUAD.csv
    │   │   │   ├── STAD.csv
    │   │   │   ├── TCGA_KIRC_survival_kfold
    │   │   │       ├── splits_0.csv
    │   │   │       ├── splits_1.csv
    │   │   │       ├── splits_2.csv
    │   │   │       ├── splits_3.csv
    │   │   │       └── splits_4.csv
    │   │   │   ├── TCGA_KIRP_survival_kfold
    │   │   │       ├── splits_0.csv
    │   │   │       ├── splits_1.csv
    │   │   │       ├── splits_2.csv
    │   │   │       ├── splits_3.csv
    │   │   │       └── splits_4.csv
    │   │   │   ├── TCGA_LUAD_survival_kfold
    │   │   │       ├── splits_0.csv
    │   │   │       ├── splits_1.csv
    │   │   │       ├── splits_2.csv
    │   │   │       ├── splits_3.csv
    │   │   │       └── splits_4.csv
    │   │   │   ├── TCGA_STAD_survival_kfold
    │   │   │       ├── splits_0.csv
    │   │   │       ├── splits_1.csv
    │   │   │       ├── splits_2.csv
    │   │   │       ├── splits_3.csv
    │   │   │       └── splits_4.csv
    │   │   │   ├── TCGA_UCEC_survival_kfold
    │   │   │       ├── splits_0.csv
    │   │   │       ├── splits_1.csv
    │   │   │       ├── splits_2.csv
    │   │   │       ├── splits_3.csv
    │   │   │       └── splits_4.csv
    │   │   │   └── UCEC.csv
    │   ├── dataset_generic.py
    │   └── dataset_survival.py
    ├── draw_heatmap.py
    ├── main.py
    ├── mamba_ssm
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── config_mamba.py
    │   │   └── mixer_seq_simple.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── bimamba.py
    │   │   ├── mamba_simple.py
    │   │   └── srmamba.py
    │   ├── ops
    │   │   ├── __init__.py
    │   │   ├── selective_scan_interface.py
    │   │   └── triton
    │   │   │   ├── __init__.py
    │   │   │   ├── layernorm.py
    │   │   │   └── selective_state_update.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── generation.py
    │   │   └── hf.py
    ├── models
    │   ├── ABMIL.py
    │   ├── CLAM.py
    │   ├── DFDT.py
    │   ├── DSMIL.py
    │   ├── MambaMIL.py
    │   ├── MambaMIL_2D.py
    │   ├── S4MIL.py
    │   ├── TransMIL.py
    │   ├── __init__.py
    │   ├── mamba_simple.py
    │   ├── pscan.py
    │   ├── pscan_2d.py
    │   └── pscan_cuda
    │   │   └── __init__.py
    └── utils
    │   ├── __init__.py
    │   ├── core_utils.py
    │   ├── file_utils.py
    │   ├── survival_utils.py
    │   └── utils.py
├── 2DVMamba
    ├── classification
    │   ├── config.py
    │   ├── configs
    │   │   └── vssm_2d
    │   │   │   ├── vmambav2_2d_small_224.yaml
    │   │   │   └── vmambav2v_2d_tiny_224.yaml
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── cached_image_folder.py
    │   │   ├── data_simmim_ft.py
    │   │   ├── data_simmim_pt.py
    │   │   ├── imagenet22k_dataset.py
    │   │   ├── map22kto1k.txt
    │   │   ├── samplers.py
    │   │   └── zipreader.py
    │   ├── debug_model.py
    │   ├── main.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── csm_triton.py
    │   │   ├── csms6s.py
    │   │   ├── mamba2
    │   │   │   ├── __init__.py
    │   │   │   ├── k_activations.py
    │   │   │   ├── layer_norm.py
    │   │   │   ├── layernorm_gated.py
    │   │   │   ├── selective_state_update.py
    │   │   │   ├── ssd_bmm.py
    │   │   │   ├── ssd_chunk_scan.py
    │   │   │   ├── ssd_chunk_state.py
    │   │   │   ├── ssd_combined.py
    │   │   │   ├── ssd_minimal.py
    │   │   │   └── ssd_state_passing.py
    │   │   ├── network_utils.py
    │   │   ├── vmamba.py
    │   │   └── vmamba_checks.py
    │   ├── readme.md
    │   └── utils
    │   │   ├── cosine_lr.py
    │   │   ├── logger.py
    │   │   ├── lr_scheduler.py
    │   │   ├── optimizer.py
    │   │   └── utils.py
    └── segmentation
    │   ├── configs
    │       ├── _base_
    │       │   ├── datasets
    │       │   │   ├── ade20k.py
    │       │   │   ├── ade20k_640x640.py
    │       │   │   ├── bdd100k.py
    │       │   │   ├── chase_db1.py
    │       │   │   ├── cityscapes.py
    │       │   │   ├── cityscapes_1024x1024.py
    │       │   │   ├── cityscapes_768x768.py
    │       │   │   ├── cityscapes_769x769.py
    │       │   │   ├── cityscapes_832x832.py
    │       │   │   ├── coco-stuff10k.py
    │       │   │   ├── coco-stuff164k.py
    │       │   │   ├── drive.py
    │       │   │   ├── hrf.py
    │       │   │   ├── isaid.py
    │       │   │   ├── levir_256x256.py
    │       │   │   ├── loveda.py
    │       │   │   ├── mapillary_v1.py
    │       │   │   ├── mapillary_v1_65.py
    │       │   │   ├── mapillary_v2.py
    │       │   │   ├── nyu.py
    │       │   │   ├── nyu_512x512.py
    │       │   │   ├── pascal_context.py
    │       │   │   ├── pascal_context_59.py
    │       │   │   ├── pascal_voc12.py
    │       │   │   ├── pascal_voc12_aug.py
    │       │   │   ├── potsdam.py
    │       │   │   ├── refuge.py
    │       │   │   ├── stare.py
    │       │   │   ├── synapse.py
    │       │   │   └── vaihingen.py
    │       │   ├── default_runtime.py
    │       │   ├── models
    │       │   │   ├── ann_r50-d8.py
    │       │   │   ├── apcnet_r50-d8.py
    │       │   │   ├── bisenetv1_r18-d32.py
    │       │   │   ├── bisenetv2.py
    │       │   │   ├── ccnet_r50-d8.py
    │       │   │   ├── cgnet.py
    │       │   │   ├── danet_r50-d8.py
    │       │   │   ├── deeplabv3_r50-d8.py
    │       │   │   ├── deeplabv3_unet_s5-d16.py
    │       │   │   ├── deeplabv3plus_r50-d8.py
    │       │   │   ├── dmnet_r50-d8.py
    │       │   │   ├── dnl_r50-d8.py
    │       │   │   ├── dpt_vit-b16.py
    │       │   │   ├── emanet_r50-d8.py
    │       │   │   ├── encnet_r50-d8.py
    │       │   │   ├── erfnet_fcn.py
    │       │   │   ├── fast_scnn.py
    │       │   │   ├── fastfcn_r50-d32_jpu_psp.py
    │       │   │   ├── fcn_hr18.py
    │       │   │   ├── fcn_r50-d8.py
    │       │   │   ├── fcn_unet_s5-d16.py
    │       │   │   ├── fpn_poolformer_s12.py
    │       │   │   ├── fpn_r50.py
    │       │   │   ├── gcnet_r50-d8.py
    │       │   │   ├── icnet_r50-d8.py
    │       │   │   ├── isanet_r50-d8.py
    │       │   │   ├── lraspp_m-v3-d8.py
    │       │   │   ├── nonlocal_r50-d8.py
    │       │   │   ├── ocrnet_hr18.py
    │       │   │   ├── ocrnet_r50-d8.py
    │       │   │   ├── pointrend_r50.py
    │       │   │   ├── psanet_r50-d8.py
    │       │   │   ├── pspnet_r50-d8.py
    │       │   │   ├── pspnet_unet_s5-d16.py
    │       │   │   ├── san_vit-b16.py
    │       │   │   ├── segformer_mit-b0.py
    │       │   │   ├── segmenter_vit-b16_mask.py
    │       │   │   ├── setr_mla.py
    │       │   │   ├── setr_naive.py
    │       │   │   ├── setr_pup.py
    │       │   │   ├── stdc.py
    │       │   │   ├── twins_pcpvt-s_fpn.py
    │       │   │   ├── twins_pcpvt-s_upernet.py
    │       │   │   ├── upernet_beit.py
    │       │   │   ├── upernet_convnext.py
    │       │   │   ├── upernet_mae.py
    │       │   │   ├── upernet_r50.py
    │       │   │   ├── upernet_swin.py
    │       │   │   ├── upernet_vit-b16_ln_mln.py
    │       │   │   └── vpd_sd.py
    │       │   └── schedules
    │       │   │   ├── schedule_160k.py
    │       │   │   ├── schedule_20k.py
    │       │   │   ├── schedule_240k.py
    │       │   │   ├── schedule_25k.py
    │       │   │   ├── schedule_320k.py
    │       │   │   ├── schedule_40k.py
    │       │   │   └── schedule_80k.py
    │       ├── convnext
    │       │   ├── README.md
    │       │   ├── convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py
    │       │   ├── convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py
    │       │   ├── convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py
    │       │   ├── convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py
    │       │   ├── convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py
    │       │   ├── convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py
    │       │   └── metafile.yaml
    │       ├── swin
    │       │   ├── README.md
    │       │   ├── metafile.yaml
    │       │   ├── swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py
    │       │   └── swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py
    │       ├── upernet
    │       │   ├── README.md
    │       │   ├── metafile.yaml
    │       │   ├── upernet_r101_4xb2-40k_cityscapes-512x1024.py
    │       │   ├── upernet_r101_4xb2-40k_cityscapes-769x769.py
    │       │   ├── upernet_r101_4xb2-80k_cityscapes-512x1024.py
    │       │   ├── upernet_r101_4xb2-80k_cityscapes-769x769.py
    │       │   ├── upernet_r101_4xb4-160k_ade20k-512x512.py
    │       │   ├── upernet_r101_4xb4-20k_voc12aug-512x512.py
    │       │   ├── upernet_r101_4xb4-40k_voc12aug-512x512.py
    │       │   ├── upernet_r101_4xb4-80k_ade20k-512x512.py
    │       │   ├── upernet_r18_4xb2-40k_cityscapes-512x1024.py
    │       │   ├── upernet_r18_4xb2-80k_cityscapes-512x1024.py
    │       │   ├── upernet_r18_4xb4-160k_ade20k-512x512.py
    │       │   ├── upernet_r18_4xb4-20k_voc12aug-512x512.py
    │       │   ├── upernet_r18_4xb4-40k_voc12aug-512x512.py
    │       │   ├── upernet_r18_4xb4-80k_ade20k-512x512.py
    │       │   ├── upernet_r50_4xb2-40k_cityscapes-512x1024.py
    │       │   ├── upernet_r50_4xb2-40k_cityscapes-769x769.py
    │       │   ├── upernet_r50_4xb2-80k_cityscapes-512x1024.py
    │       │   ├── upernet_r50_4xb2-80k_cityscapes-769x769.py
    │       │   ├── upernet_r50_4xb4-160k_ade20k-512x512.py
    │       │   ├── upernet_r50_4xb4-20k_voc12aug-512x512.py
    │       │   ├── upernet_r50_4xb4-40k_voc12aug-512x512.py
    │       │   └── upernet_r50_4xb4-80k_ade20k-512x512.py
    │       ├── vit
    │       │   ├── README.md
    │       │   ├── metafile.yaml
    │       │   ├── vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py
    │       │   ├── vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py
    │       │   ├── vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   ├── vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
    │       │   └── vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py
    │       └── vssm_2d
    │       │   └── upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py
    │   ├── model.py
    │   ├── readme.md
    │   ├── tools
    │       ├── analysis_tools
    │       │   ├── analyze_logs.py
    │       │   ├── benchmark.py
    │       │   ├── browse_dataset.py
    │       │   ├── confusion_matrix.py
    │       │   ├── get_flops.py
    │       │   └── visualization_cam.py
    │       ├── dataset_converters
    │       │   ├── chase_db1.py
    │       │   ├── cityscapes.py
    │       │   ├── coco_stuff10k.py
    │       │   ├── coco_stuff164k.py
    │       │   ├── drive.py
    │       │   ├── hrf.py
    │       │   ├── isaid.py
    │       │   ├── levircd.py
    │       │   ├── loveda.py
    │       │   ├── nyu.py
    │       │   ├── pascal_context.py
    │       │   ├── potsdam.py
    │       │   ├── refuge.py
    │       │   ├── stare.py
    │       │   ├── synapse.py
    │       │   ├── vaihingen.py
    │       │   └── voc_aug.py
    │       ├── deployment
    │       │   └── pytorch2torchscript.py
    │       ├── dist_test.sh
    │       ├── dist_train.sh
    │       ├── misc
    │       │   ├── browse_dataset.py
    │       │   ├── print_config.py
    │       │   └── publish_model.py
    │       ├── model_converters
    │       │   ├── beit2mmseg.py
    │       │   ├── clip2mmseg.py
    │       │   ├── mit2mmseg.py
    │       │   ├── san2mmseg.py
    │       │   ├── stdc2mmseg.py
    │       │   ├── swin2mmseg.py
    │       │   ├── twins2mmseg.py
    │       │   ├── vit2mmseg.py
    │       │   └── vitjax2mmseg.py
    │       ├── slurm_test.sh
    │       ├── slurm_train.sh
    │       ├── test.py
    │       ├── torchserve
    │       │   ├── mmseg2torchserve.py
    │       │   ├── mmseg_handler.py
    │       │   └── test_torchserve.py
    │       └── train.py
    │   └── vis
    │       └── vis_seg.py
├── README.md
├── cuda_kernel
    ├── CMakeLists.txt
    ├── build.sh
    ├── include
    │   ├── scan
    │   │   ├── block_scan.cuh
    │   │   ├── block_scan_warp_scans.cuh
    │   │   ├── commons.h
    │   │   ├── thread_reduce.cuh
    │   │   ├── thread_scan.cuh
    │   │   ├── warp_scan.cuh
    │   │   └── warp_scan_shfl.cuh
    │   ├── selective_scan
    │   │   ├── global.cuh
    │   │   ├── selective_scan.cuh
    │   │   ├── selective_scan_bwd_kernel.cuh
    │   │   ├── selective_scan_common.cuh
    │   │   ├── selective_scan_fwd_kernel.cuh
    │   │   └── static_switch.cuh
    │   └── utils
    │   │   └── cuda_utils.h
    └── src
    │   ├── pscan.cu
    │   ├── repo
    │       ├── scan.cuh
    │       ├── test_001.cu
    │       ├── test_002_warp.cu
    │       ├── test_003_warp_hw.cu
    │       ├── test_005_block_prefix_callback_op.cu
    │       ├── test_014_block.cu
    │       ├── test_015_block_with_prefix_callback.cu
    │       ├── test_016_block_exclusive_scan.cu
    │       ├── test_017_block_scan_inclusive_array.cu
    │       ├── test_018_blk_scan_arr_prefix_callback.cu
    │       ├── test_019_rev_blk_scan_scalar_prefix_callback.cu
    │       ├── test_020_rev_blk_scan_arr_no_preix_callback.cu
    │       ├── test_021_rev_blk_scan_arr_prefix_callback.cu
    │       └── uninitialized_copy.cuh
    │   ├── selective_scan
    │       ├── selective_scan_bwd.cu
    │       ├── selective_scan_bwd_kernel_fp16.cu
    │       ├── selective_scan_bwd_kernel_fp32.cu
    │       ├── selective_scan_fwd.cu
    │       ├── selective_scan_fwd_kernel_fp16.cu
    │       └── selective_scan_fwd_kernel_fp32.cu
    │   ├── test_arr.cu
    │   └── test_non_arr.cu
├── misc
    ├── compare.jpg
    ├── cuda.jpg
    ├── overview.jpg
    └── overview_github.jpg
└── v2dmamba_scan
    └── __init__.py


/2DMambaMIL/README.md:
--------------------------------------------------------------------------------
 1 | # 2DMambaMIL
 2 | 
 3 | We prepared the extracted feature in h5 files with the same format from [CLAM library]([/guides/content/editing-an-existing-page](https://github.com/mahmoodlab/CLAM)). 
 4 | After preparation, please replace the corresponding h5 directory for the argument `--h5_path`. For CUDA scan, please use the flag `--cuda_pscan`.
 5 | 
 6 | Sample script to run experiments.
 7 | 
 8 | ```
 9 | cd 2DMambaMIL/2DMambaMIL
10 | 
11 | CUDA_VISIBLE_DEVICES=0 python main.py --task BRACS --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path BRACS_uni/h5_files  
12 | CUDA_VISIBLE_DEVICES=0 python main.py --task BRCA --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path BRCA_uni/h5_files  
13 | CUDA_VISIBLE_DEVICES=0 python main.py --task DHMC --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path DHMC_uni/h5_files
14 | CUDA_VISIBLE_DEVICES=0 python main.py --task NSCLC --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path NSCLC_uni/h5_files 
15 | CUDA_VISIBLE_DEVICES=0 python main.py --task PANDA --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path PANDA_uni/h5_files
16 | 
17 | CUDA_VISIBLE_DEVICES=0 python main.py --task KIRC --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path KIRC_uni/h5_files  
18 | CUDA_VISIBLE_DEVICES=0 python main.py --task KIRP --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path KIRP_uni/h5_files  
19 | CUDA_VISIBLE_DEVICES=0 python main.py --task LUAD --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path LUAD_uni/h5_files 
20 | CUDA_VISIBLE_DEVICES=0 python main.py --task STAD --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path STAD_uni/h5_files
21 | CUDA_VISIBLE_DEVICES=0 python main.py --task UCEC --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path UCEC_uni/h5_files
22 | ``` 
23 | 


--------------------------------------------------------------------------------
/2DMambaMIL/dataset/csv_files/classification/TCGA-NSCLC-label.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/dataset/csv_files/classification/TCGA-NSCLC-label.csv.zip


--------------------------------------------------------------------------------
/2DMambaMIL/draw_heatmap.py:
--------------------------------------------------------------------------------
 1 | from utils.utils import WholeSlideImage
 2 | import torch
 3 | import glob
 4 | import h5py
 5 | import yaml
 6 | import argparse
 7 | import os
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--model_path', default='MaP', required=True)
11 | parser.add_argument('--survival', action='store_true', default=False, required=True)
12 | parser.add_argument('--slide_folder', required=True)
13 | parser.add_argument('--h5_folder', required=True)
14 | parser.add_argument('--heatmap_save_dir', required=True)
15 | args = parser.parse_args()
16 | 
17 | device = torch.device('cuda')
18 | model_path = f'{args.model_path}'
19 | model = torch.load(model_path).to(device)
20 | model.survival = args.survival
21 | 
22 | for path in glob.glob(args.slide_folder)[:]:
23 |     slide_id = path.split('/')[-1][:-4]
24 |     count_relevance = 0
25 |     print(slide_id)
26 |     try:
27 |         data = h5py.File(f'{args.h5_folder}/{slide_id}.h5')
28 |     except:
29 |         print(f'Cannot found h5 file for: {slide_id}')
30 |         continue
31 |     slide_feats = torch.tensor(data['features'][:]).to(device)
32 |     coords = torch.tensor(data['coords'][:]).to(device)
33 | 
34 |     _, _, prediction, attention, _ = model(slide_feats)
35 |     attention = attention.cpu().detach().numpy()
36 | 
37 |     wsi = WholeSlideImage(path)
38 |     if len(wsi.level_dim) > 3:
39 |         vis_level = 3
40 |     else:
41 |         vis_level = 2
42 |     
43 |     heatmap = wsi.visHeatmap(
44 |         scores=attention,
45 |         coords=data['coords'][:],
46 |         patch_size = (512,512),
47 |         blur = True,
48 |         overlap=0.0,
49 |         cmap = 'jet',
50 |         convert_to_percentiles = True,
51 |         vis_level = vis_level
52 |     )
53 |     os.makedirs(f'{args.heatmap_save_dir}/', exist_ok=True)
54 |     heatmap.save(f'{args.heatmap_save_dir}/{slide_id}.png')
55 | 


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/__init__.py:
--------------------------------------------------------------------------------
 1 | # __version__ = "1.1.2"
 2 | 
 3 | # from mamba.mamba_ssm.ops.selective_scan_interface import selective_scan_fn, mamba_inner_fn
 4 | # from mamba.mamba_ssm.modules.mamba_simple import Mamba
 5 | # from mamba.mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
 6 | # from mamba.mamba_ssm.modules.srmamba import SRMamba
 7 | # from mamba.mamba_ssm.modules.bimamba import BiMamba
 8 | # from mamba.mamba_ssm.mamba_simple.mamba import Mamba, MambaConfig
 9 | 
10 | 
11 | __version__ = "1.1.2"
12 | 
13 | from mamba.mamba_ssm.ops.selective_scan_interface import selective_scan_fn, mamba_inner_fn
14 | from mamba.mamba_ssm.modules.mamba_simple import Mamba
15 | from mamba.mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
16 | from mamba.mamba_ssm.modules.srmamba import SRMamba
17 | from mamba.mamba_ssm.modules.bimamba import BiMamba


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/models/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/models/config_mamba.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | 
 4 | @dataclass
 5 | class MambaConfig:
 6 | 
 7 |     d_model: int = 2560
 8 |     n_layer: int = 64
 9 |     vocab_size: int = 50277
10 |     ssm_cfg: dict = field(default_factory=dict)
11 |     rms_norm: bool = True
12 |     residual_in_fp32: bool = True
13 |     fused_add_norm: bool = True
14 |     pad_vocab_size_multiple: int = 8
15 | 


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/modules/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/ops/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/ops/triton/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/ops/triton/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/utils/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/mamba_ssm/utils/hf.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import torch
 4 | 
 5 | from transformers.utils import WEIGHTS_NAME, CONFIG_NAME
 6 | from transformers.utils.hub import cached_file
 7 | 
 8 | 
 9 | def load_config_hf(model_name):
10 |     resolved_archive_file = cached_file(model_name, CONFIG_NAME, _raise_exceptions_for_missing_entries=False)
11 |     return json.load(open(resolved_archive_file))
12 | 
13 | 
14 | def load_state_dict_hf(model_name, device=None, dtype=None):
15 |     # If not fp32, then we don't want to load directly to the GPU
16 |     mapped_device = "cpu" if dtype not in [torch.float32, None] else device
17 |     resolved_archive_file = cached_file(model_name, WEIGHTS_NAME, _raise_exceptions_for_missing_entries=False)
18 |     return torch.load(resolved_archive_file, map_location=mapped_device)
19 |     # Convert dtype before moving to GPU to save memory
20 |     if dtype is not None:
21 |         state_dict = {k: v.to(dtype=dtype) for k, v in state_dict.items()}
22 |     state_dict = {k: v.to(device=device) for k, v in state_dict.items()}
23 |     return state_dict
24 | 


--------------------------------------------------------------------------------
/2DMambaMIL/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/models/__init__.py


--------------------------------------------------------------------------------
/2DMambaMIL/models/pscan_cuda/__init__.py:
--------------------------------------------------------------------------------
1 | from .pscan import *
2 | 


--------------------------------------------------------------------------------
/2DMambaMIL/utils/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('/home/yihui/VscodeFiles/Multimodel_Pretrain/Fearture_extractor/utils')


--------------------------------------------------------------------------------
/2DMambaMIL/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | def save_pkl(filename, save_object):
 4 | 	writer = open(filename,'wb')
 5 | 	pickle.dump(save_object, writer)
 6 | 	writer.close()
 7 | 
 8 | def load_pkl(filename):
 9 | 	loader = open(filename,'rb')
10 | 	file = pickle.load(loader)
11 | 	loader.close()
12 | 	return file
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/configs/vssm_2d/vmambav2_2d_small_224.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: vssm
 3 |   NAME: vssm1_small_0229
 4 |   DROP_PATH_RATE: 0.3
 5 |   VSSM:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [ 2, 2, 15, 2 ]
 8 |     SSM_D_STATE: 1
 9 |     SSM_DT_RANK: "auto"
10 |     SSM_RATIO: 2.0
11 |     SSM_CONV: 3
12 |     SSM_CONV_BIAS: false
13 |     SSM_FORWARDTYPE: "v05_noz" # v3_noz
14 |     MLP_RATIO: 4.0
15 |     DOWNSAMPLE: "v3"
16 |     PATCHEMBED: "v2"
17 |     NORM_LAYER: "ln2d"
18 |     USE_V2D: True
19 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/configs/vssm_2d/vmambav2v_2d_tiny_224.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: vssm
 3 |   NAME: vssm1_tiny_0230s
 4 |   DROP_PATH_RATE: 0.2
 5 |   VSSM:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [ 2, 2, 8, 2 ]
 8 |     SSM_D_STATE: 1
 9 |     SSM_DT_RANK: "auto"
10 |     SSM_RATIO: 1.0
11 |     SSM_CONV: 3
12 |     SSM_CONV_BIAS: false
13 |     SSM_FORWARDTYPE: "v05_noz" # v3_noz
14 |     MLP_RATIO: 4.0
15 |     DOWNSAMPLE: "v3"
16 |     PATCHEMBED: "v2"
17 |     NORM_LAYER: "ln2d"
18 |     USE_V2D: True
19 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .build import build_loader as _build_loader
 2 | from .data_simmim_pt import build_loader_simmim
 3 | from .data_simmim_ft import build_loader_finetune
 4 | 
 5 | 
 6 | def build_loader(config, simmim=False, is_pretrain=False):
 7 |     if not simmim:
 8 |         return _build_loader(config)
 9 |     if is_pretrain:
10 |         return build_loader_simmim(config)
11 |     else:
12 |         return build_loader_finetune(config)
13 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/data/imagenet22k_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch.utils.data as data
 4 | import numpy as np
 5 | from PIL import Image
 6 | 
 7 | import warnings
 8 | 
 9 | warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
10 | 
11 | 
12 | class IN22KDATASET(data.Dataset):
13 |     def __init__(self, root, ann_file='', transform=None, target_transform=None):
14 |         super(IN22KDATASET, self).__init__()
15 | 
16 |         self.data_path = root
17 |         self.ann_path = os.path.join(self.data_path, ann_file)
18 |         self.transform = transform
19 |         self.target_transform = target_transform
20 |         # id & label: https://github.com/google-research/big_transfer/issues/7
21 |         # total: 21843; only 21841 class have images: map 21841->9205; 21842->15027
22 |         self.database = json.load(open(self.ann_path))
23 | 
24 |     def _load_image(self, path):
25 |         try:
26 |             im = Image.open(path)
27 |         except:
28 |             print("ERROR IMG LOADED: ", path)
29 |             random_img = np.random.rand(224, 224, 3) * 255
30 |             im = Image.fromarray(np.uint8(random_img))
31 |         return im
32 | 
33 |     def __getitem__(self, index):
34 |         """
35 |         Args:
36 |             index (int): Index
37 |         Returns:
38 |             tuple: (image, target) where target is class_index of the target class.
39 |         """
40 |         idb = self.database[index]
41 | 
42 |         # images
43 |         images = self._load_image(self.data_path + '/' + idb[0]).convert('RGB')
44 |         if self.transform is not None:
45 |             images = self.transform(images)
46 | 
47 |         # target
48 |         target = int(idb[1])
49 |         if self.target_transform is not None:
50 |             target = self.target_transform(target)
51 | 
52 |         return images, target
53 | 
54 |     def __len__(self):
55 |         return len(self.database)
56 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/data/samplers.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Swin Transformer
 3 | # Copyright (c) 2021 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ze Liu
 6 | # --------------------------------------------------------
 7 | 
 8 | import torch
 9 | 
10 | 
11 | class SubsetRandomSampler(torch.utils.data.Sampler):
12 |     r"""Samples elements randomly from a given list of indices, without replacement.
13 | 
14 |     Arguments:
15 |         indices (sequence): a sequence of indices
16 |     """
17 | 
18 |     def __init__(self, indices):
19 |         self.epoch = 0
20 |         self.indices = indices
21 | 
22 |     def __iter__(self):
23 |         return (self.indices[i] for i in torch.randperm(len(self.indices)))
24 | 
25 |     def __len__(self):
26 |         return len(self.indices)
27 | 
28 |     def set_epoch(self, epoch):
29 |         self.epoch = epoch
30 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/debug_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | def permute(x, hw_shape, B, direction_Bs, permute_B=False):
 3 |     # x: B, L, D(E)
 4 |     # B: B, d_state, L
 5 |     # direction_Bs: 4, dstate
 6 |     H, W = hw_shape
 7 |     BB, L, D = x.shape
 8 |     x = x.reshape(BB, H, W, D)
 9 | 
10 |     x_1 = x.permute(0, 3, 1, 2)  # [B, L, H, W]
11 |     HW_1 = (H, W)
12 | 
13 |     x_2 = x_1.permute(0, 1, 3, 2)  # [B, L, W, H]
14 |     HW_2 = (W, H)
15 | 
16 |     x_1 = x_1.flatten(2)
17 |     x_2 = x_2.flatten(2)
18 | 
19 |     x_3 = x_1.flip(-1)
20 |     HW_3 = HW_1
21 |     x_4 = x_2.flip(-1)
22 |     HW_4 = HW_2
23 | 
24 |     if permute_B:
25 |         B = B.reshape(B.shape[0], B.shape[1], H, W)
26 |         B1 = B.flatten(2)
27 |         B2 = B.permute(0, 1, 3, 2).flatten(2)
28 |         B3 = B1.flip(-1)
29 |         B4 = B2.flip(-1)
30 |         Bs = [B1, B2, B3, B4]
31 |     else:
32 |         Bs = [B, B, B, B]
33 | 
34 |     dBs = [db[None, :, None] for db in direction_Bs]
35 | 
36 |     return [x_1, x_2, x_3, x_4], [HW_1, HW_2, HW_3, HW_4], Bs, dBs
37 | 
38 | def unpermute_and_sum(ys, H, W):
39 |     # ys list of 4 [B, D, L]
40 |     ys0 = ys[0]
41 |     ys1 = ys[1]
42 |     ys2 = ys[2].flip(-1)
43 |     ys3 = ys[3].flip(-1)
44 | 
45 |     ys02 = ys0 + ys2
46 |     ys13 = ys1 + ys3
47 |     ys13 = ys13.reshape(ys13.shape[0], ys13.shape[1], W, H)
48 |     ys13 = ys13.permute(0, 1, 3, 2).flatten(2)
49 |     ys_out = ys02 + ys13
50 |     ys_out = ys_out.permute(0, 2, 1)
51 |     return ys_out
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     x = torch.range(0, 5).reshape(1, 6, 1)
56 |     hw_shape = (2, 3)
57 |     B = torch.range(0, 5).reshape(1, 1, 6)
58 |     direction_Bs = torch.range(0, 3).reshape(4, 1)
59 | 
60 |     xs, HWs, Bs, dBs = permute(x, hw_shape, B, direction_Bs, permute_B=True)
61 | 
62 |     ys_out = unpermute_and_sum(xs, *hw_shape)
63 | 
64 |     print(xs)
65 |     print(HWs)
66 |     print(Bs)
67 |     print(dBs)
68 |     print(ys_out)


--------------------------------------------------------------------------------
/2DVMamba/classification/models/mamba2/__init__.py:
--------------------------------------------------------------------------------
1 | # all the code in this folder is copied from https://github.com/state-spaces/mamba/blob/main/mamba_ssm/ops/triton/
2 | 
3 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/models/network_utils.py:
--------------------------------------------------------------------------------
 1 | import numbers
 2 | from typing import Tuple
 3 | 
 4 | import torch
 5 | from torch import nn, Tensor
 6 | from torch.nn import Parameter, init
 7 | 
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class AffineFirstLayerNorm(nn.Module):
12 |     __constants__ = ['affine_shape', 'normalized_shape', 'eps']
13 |     normalized_shape: Tuple[int, ...]
14 |     eps: float
15 |     elementwise_affine: bool
16 | 
17 |     def __init__(self, affine_shape,  normalized_shape, eps: float = 1e-5,
18 |                  bias: bool = True, device=None, dtype=None) -> None:
19 |         factory_kwargs = {'device': device, 'dtype': dtype}
20 |         super().__init__()
21 |         if isinstance(normalized_shape, numbers.Integral):
22 |             # mypy error: incompatible types in assignment
23 |             normalized_shape = (normalized_shape,)  # type: ignore[assignment]
24 |         self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
25 | 
26 |         if isinstance(affine_shape, numbers.Integral):
27 |             # mypy error: incompatible types in assignment
28 |             affine_shape = (affine_shape,)  # type: ignore[assignment]
29 |         self.affine_shape = tuple(affine_shape)  # type: ignore[arg-type]
30 | 
31 |         self.eps = eps
32 | 
33 |         self.weight = Parameter(torch.empty(self.affine_shape, **factory_kwargs))
34 |         if bias:
35 |             self.bias = Parameter(torch.empty(self.affine_shape, **factory_kwargs))
36 |         else:
37 |             self.register_parameter('bias', None)
38 | 
39 | 
40 |         self.reset_parameters()
41 | 
42 |     def reset_parameters(self) -> None:
43 |         init.ones_(self.weight)
44 |         if self.bias is not None:
45 |             init.zeros_(self.bias)
46 | 
47 |     def forward(self, input: Tensor) -> Tensor:
48 |         input = input * self.weight + self.bias
49 |         return F.layer_norm(
50 |             input, self.normalized_shape, None, None, self.eps)
51 | 
52 |     def extra_repr(self) -> str:
53 |         return '{affine_shape}, {normalized_shape}, eps={eps}, '.format(**self.__dict__)
54 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/readme.md:
--------------------------------------------------------------------------------
1 | ## origins
2 | 
3 | based on https://github.com/microsoft/Swin-Transformer#20240103
4 | 
5 | `main.py` and `utils/utils_ema.py` is modified from https://github.com/microsoft/Swin-Transformer#20240103, based on https://github.com/facebookresearch/ConvNeXt#20240103
6 | 
7 | 


--------------------------------------------------------------------------------
/2DVMamba/classification/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Swin Transformer
 3 | # Copyright (c) 2021 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ze Liu
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | import logging
11 | import functools
12 | from termcolor import colored
13 | 
14 | 
15 | @functools.lru_cache()
16 | def create_logger(output_dir, dist_rank=0, name=''):
17 |     # create logger
18 |     logger = logging.getLogger(name)
19 |     logger.setLevel(logging.DEBUG)
20 |     logger.propagate = False
21 | 
22 |     # create formatter
23 |     fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s'
24 |     color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \
25 |                 colored('(%(filename)s %(lineno)d)', 'yellow') + ': %(levelname)s %(message)s'
26 | 
27 |     # create console handlers for master process
28 |     if dist_rank == 0:
29 |         console_handler = logging.StreamHandler(sys.stdout)
30 |         console_handler.setLevel(logging.DEBUG)
31 |         console_handler.setFormatter(
32 |             logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S'))
33 |         logger.addHandler(console_handler)
34 | 
35 |     # create file handlers
36 |     file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a')
37 |     file_handler.setLevel(logging.DEBUG)
38 |     file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S'))
39 |     logger.addHandler(file_handler)
40 | 
41 |     return logger
42 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = 'data/ade/ADEChallengeData2016'
 4 | crop_size = (512, 512)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 512),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations', reduce_zero_label=True),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='images/training', seg_map_path='annotations/training'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='images/validation',
63 |             seg_map_path='annotations/validation'),
64 |         pipeline=test_pipeline))
65 | test_dataloader = val_dataloader
66 | 
67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68 | test_evaluator = val_evaluator
69 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/ade20k_640x640.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = 'data/ade/ADEChallengeData2016'
 4 | crop_size = (640, 640)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2560, 640),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2560, 640), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations', reduce_zero_label=True),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='images/training', seg_map_path='annotations/training'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='images/validation',
63 |             seg_map_path='annotations/validation'),
64 |         pipeline=test_pipeline))
65 | test_dataloader = val_dataloader
66 | 
67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68 | test_evaluator = val_evaluator
69 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/bdd100k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'BDD100KDataset'
 3 | data_root = 'data/bdd100k/'
 4 | 
 5 | crop_size = (512, 1024)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations'),
 9 |     dict(
10 |         type='RandomResize',
11 |         scale=(2048, 1024),
12 |         ratio_range=(0.5, 2.0),
13 |         keep_ratio=True),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='PackSegInputs')
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
22 |     # add loading annotation after ``Resize`` because ground truth
23 |     # does not need to do resize data transform
24 |     dict(type='LoadAnnotations'),
25 |     dict(type='PackSegInputs')
26 | ]
27 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28 | tta_pipeline = [
29 |     dict(type='LoadImageFromFile', backend_args=None),
30 |     dict(
31 |         type='TestTimeAug',
32 |         transforms=[
33 |             [
34 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
35 |                 for r in img_ratios
36 |             ],
37 |             [
38 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
39 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
40 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41 |         ])
42 | ]
43 | train_dataloader = dict(
44 |     batch_size=2,
45 |     num_workers=2,
46 |     persistent_workers=True,
47 |     sampler=dict(type='InfiniteSampler', shuffle=True),
48 |     dataset=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         data_prefix=dict(
52 |             img_path='images/10k/train',
53 |             seg_map_path='labels/sem_seg/masks/train'),
54 |         pipeline=train_pipeline))
55 | val_dataloader = dict(
56 |     batch_size=1,
57 |     num_workers=4,
58 |     persistent_workers=True,
59 |     sampler=dict(type='DefaultSampler', shuffle=False),
60 |     dataset=dict(
61 |         type=dataset_type,
62 |         data_root=data_root,
63 |         data_prefix=dict(
64 |             img_path='images/10k/val',
65 |             seg_map_path='labels/sem_seg/masks/val'),
66 |         pipeline=test_pipeline))
67 | test_dataloader = val_dataloader
68 | 
69 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
70 | test_evaluator = val_evaluator
71 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | crop_size = (512, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 1024),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations'),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=2,
44 |     num_workers=2,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
63 |         pipeline=test_pipeline))
64 | test_dataloader = val_dataloader
65 | 
66 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
67 | test_evaluator = val_evaluator
68 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (1024, 1024)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2048, 1024),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/cityscapes_768x768.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (768, 768)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2049, 1025),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/cityscapes_769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (769, 769)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2049, 1025),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/cityscapes_832x832.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | crop_size = (832, 832)
 3 | train_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadAnnotations'),
 6 |     dict(
 7 |         type='RandomResize',
 8 |         scale=(2048, 1024),
 9 |         ratio_range=(0.5, 2.0),
10 |         keep_ratio=True),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='PackSegInputs')
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19 |     # add loading annotation after ``Resize`` because ground truth
20 |     # does not need to do resize data transform
21 |     dict(type='LoadAnnotations'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26 | test_dataloader = val_dataloader
27 | 
28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29 | test_evaluator = val_evaluator
30 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/coco-stuff164k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'COCOStuffDataset'
 3 | data_root = 'data/coco_stuff164k'
 4 | crop_size = (512, 512)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 512),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations'),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='images/train2017', seg_map_path='annotations/train2017'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='images/val2017', seg_map_path='annotations/val2017'),
63 |         pipeline=test_pipeline))
64 | test_dataloader = val_dataloader
65 | 
66 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
67 | test_evaluator = val_evaluator
68 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/levir_256x256.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'LEVIRCDDataset'
 3 | data_root = r'data/LEVIRCD'
 4 | 
 5 | albu_train_transforms = [
 6 |     dict(type='RandomBrightnessContrast', p=0.2),
 7 |     dict(type='HorizontalFlip', p=0.5),
 8 |     dict(type='VerticalFlip', p=0.5)
 9 | ]
10 | 
11 | train_pipeline = [
12 |     dict(type='LoadMultipleRSImageFromFile'),
13 |     dict(type='LoadAnnotations'),
14 |     dict(type='Albu', transforms=albu_train_transforms),
15 |     dict(type='ConcatCDInput'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadMultipleRSImageFromFile'),
20 |     dict(type='LoadAnnotations'),
21 |     dict(type='ConcatCDInput'),
22 |     dict(type='PackSegInputs')
23 | ]
24 | 
25 | tta_pipeline = [
26 |     dict(type='LoadMultipleRSImageFromFile'),
27 |     dict(
28 |         type='TestTimeAug',
29 |         transforms=[[dict(type='LoadAnnotations')],
30 |                     [dict(type='ConcatCDInput')],
31 |                     [dict(type='PackSegInputs')]])
32 | ]
33 | train_dataloader = dict(
34 |     batch_size=4,
35 |     num_workers=4,
36 |     persistent_workers=True,
37 |     sampler=dict(type='InfiniteSampler', shuffle=True),
38 |     dataset=dict(
39 |         type=dataset_type,
40 |         data_root=data_root,
41 |         data_prefix=dict(
42 |             img_path='train/A',
43 |             img_path2='train/B',
44 |             seg_map_path='train/label'),
45 |         pipeline=train_pipeline))
46 | val_dataloader = dict(
47 |     batch_size=1,
48 |     num_workers=4,
49 |     persistent_workers=True,
50 |     sampler=dict(type='DefaultSampler', shuffle=False),
51 |     dataset=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         data_prefix=dict(
55 |             img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
56 |         pipeline=test_pipeline))
57 | test_dataloader = val_dataloader
58 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
59 | test_evaluator = val_evaluator
60 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'LoveDADataset'
 3 | data_root = 'data/loveDA'
 4 | crop_size = (512, 512)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 512),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations', reduce_zero_label=True),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='img_dir/train', seg_map_path='ann_dir/train'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62 |         pipeline=test_pipeline))
63 | test_dataloader = val_dataloader
64 | 
65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66 | test_evaluator = val_evaluator
67 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/mapillary_v1.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'MapillaryDataset_v1'
 3 | data_root = 'data/mapillary/'
 4 | crop_size = (512, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 1024),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations'),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=2,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='training/images', seg_map_path='training/v1.2/labels'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='validation/images',
63 |             seg_map_path='validation/v1.2/labels'),
64 |         pipeline=test_pipeline))
65 | test_dataloader = val_dataloader
66 | 
67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68 | test_evaluator = val_evaluator
69 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/mapillary_v2.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'MapillaryDataset_v2'
 3 | data_root = 'data/mapillary/'
 4 | crop_size = (512, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(2048, 1024),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations'),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=2,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='training/images', seg_map_path='training/v2.0/labels'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(
62 |             img_path='validation/images',
63 |             seg_map_path='validation/v2.0/labels'),
64 |         pipeline=test_pipeline))
65 | test_dataloader = val_dataloader
66 | 
67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68 | test_evaluator = val_evaluator
69 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/nyu.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'NYUDataset'
 3 | data_root = 'data/nyu'
 4 | 
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
 8 |     dict(type='RandomDepthMix', prob=0.25),
 9 |     dict(type='RandomFlip', prob=0.5),
10 |     dict(type='RandomCrop', crop_size=(480, 480)),
11 |     dict(
12 |         type='Albu',
13 |         transforms=[
14 |             dict(type='RandomBrightnessContrast'),
15 |             dict(type='RandomGamma'),
16 |             dict(type='HueSaturationValue'),
17 |         ]),
18 |     dict(
19 |         type='PackSegInputs',
20 |         meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
21 |                    'pad_shape', 'scale_factor', 'flip', 'flip_direction',
22 |                    'category_id')),
23 | ]
24 | 
25 | test_pipeline = [
26 |     dict(type='LoadImageFromFile'),
27 |     dict(type='Resize', scale=(2000, 480), keep_ratio=True),
28 |     dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
29 |     dict(
30 |         type='PackSegInputs',
31 |         meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
32 |                    'pad_shape', 'scale_factor', 'flip', 'flip_direction',
33 |                    'category_id'))
34 | ]
35 | 
36 | train_dataloader = dict(
37 |     batch_size=8,
38 |     num_workers=8,
39 |     persistent_workers=True,
40 |     sampler=dict(type='InfiniteSampler', shuffle=True),
41 |     dataset=dict(
42 |         type=dataset_type,
43 |         data_root=data_root,
44 |         data_prefix=dict(
45 |             img_path='images/train', depth_map_path='annotations/train'),
46 |         pipeline=train_pipeline))
47 | 
48 | val_dataloader = dict(
49 |     batch_size=1,
50 |     num_workers=4,
51 |     persistent_workers=True,
52 |     sampler=dict(type='DefaultSampler', shuffle=False),
53 |     dataset=dict(
54 |         type=dataset_type,
55 |         data_root=data_root,
56 |         test_mode=True,
57 |         data_prefix=dict(
58 |             img_path='images/test', depth_map_path='annotations/test'),
59 |         pipeline=test_pipeline))
60 | test_dataloader = val_dataloader
61 | 
62 | val_evaluator = dict(
63 |     type='DepthMetric',
64 |     min_depth_eval=0.001,
65 |     max_depth_eval=10.0,
66 |     crop_type='nyu_crop')
67 | test_evaluator = val_evaluator
68 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/nyu_512x512.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'NYUDataset'
 3 | data_root = 'data/nyu'
 4 | 
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
 8 |     dict(type='RandomDepthMix', prob=0.25),
 9 |     dict(type='RandomFlip', prob=0.5),
10 |     dict(
11 |         type='RandomResize',
12 |         scale=(768, 512),
13 |         ratio_range=(0.8, 1.5),
14 |         keep_ratio=True),
15 |     dict(type='RandomCrop', crop_size=(512, 512)),
16 |     dict(
17 |         type='Albu',
18 |         transforms=[
19 |             dict(type='RandomBrightnessContrast'),
20 |             dict(type='RandomGamma'),
21 |             dict(type='HueSaturationValue'),
22 |         ]),
23 |     dict(
24 |         type='PackSegInputs',
25 |         meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
26 |                    'pad_shape', 'scale_factor', 'flip', 'flip_direction',
27 |                    'category_id')),
28 | ]
29 | 
30 | test_pipeline = [
31 |     dict(type='LoadImageFromFile'),
32 |     dict(type='Resize', scale=(2048, 512), keep_ratio=True),
33 |     dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
34 |     dict(
35 |         type='PackSegInputs',
36 |         meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
37 |                    'pad_shape', 'scale_factor', 'flip', 'flip_direction',
38 |                    'category_id'))
39 | ]
40 | 
41 | train_dataloader = dict(
42 |     batch_size=8,
43 |     num_workers=8,
44 |     persistent_workers=True,
45 |     sampler=dict(type='InfiniteSampler', shuffle=True),
46 |     dataset=dict(
47 |         type=dataset_type,
48 |         data_root=data_root,
49 |         data_prefix=dict(
50 |             img_path='images/train', depth_map_path='annotations/train'),
51 |         pipeline=train_pipeline))
52 | 
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         test_mode=True,
62 |         data_prefix=dict(
63 |             img_path='images/test', depth_map_path='annotations/test'),
64 |         pipeline=test_pipeline))
65 | test_dataloader = val_dataloader
66 | 
67 | val_evaluator = dict(
68 |     type='DepthMetric',
69 |     min_depth_eval=0.001,
70 |     max_depth_eval=10.0,
71 |     crop_type='nyu_crop')
72 | test_evaluator = val_evaluator
73 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/pascal_context.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalContextDataset'
 3 | data_root = 'data/VOCdevkit/VOC2010/'
 4 | 
 5 | img_scale = (520, 520)
 6 | crop_size = (480, 480)
 7 | 
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(
12 |         type='RandomResize',
13 |         scale=img_scale,
14 |         ratio_range=(0.5, 2.0),
15 |         keep_ratio=True),
16 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
17 |     dict(type='RandomFlip', prob=0.5),
18 |     dict(type='PhotoMetricDistortion'),
19 |     dict(type='PackSegInputs')
20 | ]
21 | test_pipeline = [
22 |     dict(type='LoadImageFromFile'),
23 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
24 |     # add loading annotation after ``Resize`` because ground truth
25 |     # does not need to do resize data transform
26 |     dict(type='LoadAnnotations'),
27 |     dict(type='PackSegInputs')
28 | ]
29 | train_dataloader = dict(
30 |     batch_size=4,
31 |     num_workers=4,
32 |     persistent_workers=True,
33 |     sampler=dict(type='InfiniteSampler', shuffle=True),
34 |     dataset=dict(
35 |         type=dataset_type,
36 |         data_root=data_root,
37 |         data_prefix=dict(
38 |             img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
39 |         ann_file='ImageSets/SegmentationContext/train.txt',
40 |         pipeline=train_pipeline))
41 | val_dataloader = dict(
42 |     batch_size=1,
43 |     num_workers=4,
44 |     persistent_workers=True,
45 |     sampler=dict(type='DefaultSampler', shuffle=False),
46 |     dataset=dict(
47 |         type=dataset_type,
48 |         data_root=data_root,
49 |         data_prefix=dict(
50 |             img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
51 |         ann_file='ImageSets/SegmentationContext/val.txt',
52 |         pipeline=test_pipeline))
53 | test_dataloader = val_dataloader
54 | 
55 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
56 | test_evaluator = val_evaluator
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PotsdamDataset'
 3 | data_root = 'data/potsdam'
 4 | crop_size = (512, 512)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(512, 512),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(512, 512), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations', reduce_zero_label=True),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='img_dir/train', seg_map_path='ann_dir/train'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62 |         pipeline=test_pipeline))
63 | test_dataloader = val_dataloader
64 | 
65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66 | test_evaluator = val_evaluator
67 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/synapse.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'SynapseDataset'
 2 | data_root = 'data/synapse/'
 3 | img_scale = (224, 224)
 4 | train_pipeline = [
 5 |     dict(type='LoadImageFromFile'),
 6 |     dict(type='LoadAnnotations'),
 7 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
 8 |     dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
 9 |     dict(type='PackSegInputs')
10 | ]
11 | test_pipeline = [
12 |     dict(type='LoadImageFromFile'),
13 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
14 |     dict(type='LoadAnnotations'),
15 |     dict(type='PackSegInputs')
16 | ]
17 | train_dataloader = dict(
18 |     batch_size=6,
19 |     num_workers=2,
20 |     persistent_workers=True,
21 |     sampler=dict(type='InfiniteSampler', shuffle=True),
22 |     dataset=dict(
23 |         type=dataset_type,
24 |         data_root=data_root,
25 |         data_prefix=dict(
26 |             img_path='img_dir/train', seg_map_path='ann_dir/train'),
27 |         pipeline=train_pipeline))
28 | val_dataloader = dict(
29 |     batch_size=1,
30 |     num_workers=4,
31 |     persistent_workers=True,
32 |     sampler=dict(type='DefaultSampler', shuffle=False),
33 |     dataset=dict(
34 |         type=dataset_type,
35 |         data_root=data_root,
36 |         data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
37 |         pipeline=test_pipeline))
38 | test_dataloader = val_dataloader
39 | 
40 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
41 | test_evaluator = val_evaluator
42 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/datasets/vaihingen.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ISPRSDataset'
 3 | data_root = 'data/vaihingen'
 4 | crop_size = (512, 512)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 8 |     dict(
 9 |         type='RandomResize',
10 |         scale=(512, 512),
11 |         ratio_range=(0.5, 2.0),
12 |         keep_ratio=True),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='PackSegInputs')
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', scale=(512, 512), keep_ratio=True),
21 |     # add loading annotation after ``Resize`` because ground truth
22 |     # does not need to do resize data transform
23 |     dict(type='LoadAnnotations', reduce_zero_label=True),
24 |     dict(type='PackSegInputs')
25 | ]
26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27 | tta_pipeline = [
28 |     dict(type='LoadImageFromFile', backend_args=None),
29 |     dict(
30 |         type='TestTimeAug',
31 |         transforms=[
32 |             [
33 |                 dict(type='Resize', scale_factor=r, keep_ratio=True)
34 |                 for r in img_ratios
35 |             ],
36 |             [
37 |                 dict(type='RandomFlip', prob=0., direction='horizontal'),
38 |                 dict(type='RandomFlip', prob=1., direction='horizontal')
39 |             ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40 |         ])
41 | ]
42 | train_dataloader = dict(
43 |     batch_size=4,
44 |     num_workers=4,
45 |     persistent_workers=True,
46 |     sampler=dict(type='InfiniteSampler', shuffle=True),
47 |     dataset=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         data_prefix=dict(
51 |             img_path='img_dir/train', seg_map_path='ann_dir/train'),
52 |         pipeline=train_pipeline))
53 | val_dataloader = dict(
54 |     batch_size=1,
55 |     num_workers=4,
56 |     persistent_workers=True,
57 |     sampler=dict(type='DefaultSampler', shuffle=False),
58 |     dataset=dict(
59 |         type=dataset_type,
60 |         data_root=data_root,
61 |         data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62 |         pipeline=test_pipeline))
63 | test_dataloader = val_dataloader
64 | 
65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66 | test_evaluator = val_evaluator
67 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmseg'
 2 | env_cfg = dict(
 3 |     cudnn_benchmark=True,
 4 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
 5 |     dist_cfg=dict(backend='nccl'),
 6 | )
 7 | vis_backends = [dict(type='LocalVisBackend')]
 8 | visualizer = dict(
 9 |     type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
10 | log_processor = dict(by_epoch=False)
11 | log_level = 'INFO'
12 | load_from = None
13 | resume = False
14 | 
15 | tta_model = dict(type='SegTTAModel')
16 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/ann_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ANNHead',
27 |         in_channels=[1024, 2048],
28 |         in_index=[2, 3],
29 |         channels=512,
30 |         project_channels=256,
31 |         query_scales=(1, ),
32 |         key_pool_scales=(1, 3, 6, 8),
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/apcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='APCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pool_scales=(1, 2, 3, 6),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/ccnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='CCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         recurrence=2,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/cgnet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[72.39239876, 82.90891754, 73.15835921],
 6 |     std=[1, 1, 1],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     backbone=dict(
14 |         type='CGNet',
15 |         norm_cfg=norm_cfg,
16 |         in_channels=3,
17 |         num_channels=(32, 64, 128),
18 |         num_blocks=(3, 21),
19 |         dilations=(2, 4),
20 |         reductions=(8, 16)),
21 |     decode_head=dict(
22 |         type='FCNHead',
23 |         in_channels=256,
24 |         in_index=2,
25 |         channels=256,
26 |         num_convs=0,
27 |         concat_input=False,
28 |         dropout_ratio=0,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         loss_decode=dict(
32 |             type='CrossEntropyLoss',
33 |             use_sigmoid=False,
34 |             loss_weight=1.0,
35 |             class_weight=[
36 |                 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
37 |                 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
38 |                 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
39 |                 10.396974, 10.055647
40 |             ])),
41 |     # model training and testing settings
42 |     train_cfg=dict(sampler=None),
43 |     test_cfg=dict(mode='whole'))
44 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/danet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pam_channels=64,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/deeplabv3_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ASPPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dilations=(1, 12, 24, 36),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='ASPPHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=16,
36 |         dilations=(1, 12, 24, 36),
37 |         dropout_ratio=0.1,
38 |         num_classes=2,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=128,
46 |         in_index=3,
47 |         channels=64,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=2,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/deeplabv3plus_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DepthwiseSeparableASPPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dilations=(1, 12, 24, 36),
31 |         c1_in_channels=256,
32 |         c1_channels=48,
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/dmnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DMHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         filter_sizes=(1, 3, 5, 7),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/dnl_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='DNLHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dropout_ratio=0.1,
31 |         reduction=2,
32 |         use_scale=True,
33 |         mode='embedded_gaussian',
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/dpt_vit-b16.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
13 |     backbone=dict(
14 |         type='VisionTransformer',
15 |         img_size=224,
16 |         embed_dims=768,
17 |         num_layers=12,
18 |         num_heads=12,
19 |         out_indices=(2, 5, 8, 11),
20 |         final_norm=False,
21 |         with_cls_token=True,
22 |         output_cls_token=True),
23 |     decode_head=dict(
24 |         type='DPTHead',
25 |         in_channels=(768, 768, 768, 768),
26 |         channels=256,
27 |         embed_dims=768,
28 |         post_process_channels=[96, 192, 384, 768],
29 |         num_classes=150,
30 |         readout_type='project',
31 |         input_transform='multiple_select',
32 |         in_index=(0, 1, 2, 3),
33 |         norm_cfg=norm_cfg,
34 |         loss_decode=dict(
35 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |     auxiliary_head=None,
37 |     # model training and testing settings
38 |     train_cfg=dict(),
39 |     test_cfg=dict(mode='whole'))  # yapf: disable
40 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/emanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='EMAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=256,
30 |         ema_channels=512,
31 |         num_bases=64,
32 |         num_stages=3,
33 |         momentum=0.1,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     auxiliary_head=dict(
41 |         type='FCNHead',
42 |         in_channels=1024,
43 |         in_index=2,
44 |         channels=256,
45 |         num_convs=1,
46 |         concat_input=False,
47 |         dropout_ratio=0.1,
48 |         num_classes=19,
49 |         norm_cfg=norm_cfg,
50 |         align_corners=False,
51 |         loss_decode=dict(
52 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='whole'))
56 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/encnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='EncHead',
27 |         in_channels=[512, 1024, 2048],
28 |         in_index=(1, 2, 3),
29 |         channels=512,
30 |         num_codes=32,
31 |         use_se_loss=True,
32 |         add_lateral=False,
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |         loss_se_decode=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
41 |     auxiliary_head=dict(
42 |         type='FCNHead',
43 |         in_channels=1024,
44 |         in_index=2,
45 |         channels=256,
46 |         num_convs=1,
47 |         concat_input=False,
48 |         dropout_ratio=0.1,
49 |         num_classes=19,
50 |         norm_cfg=norm_cfg,
51 |         align_corners=False,
52 |         loss_decode=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54 |     # model training and testing settings
55 |     train_cfg=dict(),
56 |     test_cfg=dict(mode='whole'))
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/erfnet_fcn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='ERFNet',
16 |         in_channels=3,
17 |         enc_downsample_channels=(16, 64, 128),
18 |         enc_stage_non_bottlenecks=(5, 8),
19 |         enc_non_bottleneck_dilations=(2, 4, 8, 16),
20 |         enc_non_bottleneck_channels=(64, 128),
21 |         dec_upsample_channels=(64, 16),
22 |         dec_stages_non_bottleneck=(2, 2),
23 |         dec_non_bottleneck_channels=(64, 16),
24 |         dropout_ratio=0.1,
25 |         init_cfg=None),
26 |     decode_head=dict(
27 |         type='FCNHead',
28 |         in_channels=16,
29 |         channels=128,
30 |         num_convs=1,
31 |         concat_input=False,
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     # model training and testing settings
39 |     train_cfg=dict(),
40 |     test_cfg=dict(mode='whole'))
41 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fast_scnn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     backbone=dict(
14 |         type='FastSCNN',
15 |         downsample_dw_channels=(32, 48),
16 |         global_in_channels=64,
17 |         global_block_channels=(64, 96, 128),
18 |         global_block_strides=(2, 2, 1),
19 |         global_out_channels=128,
20 |         higher_in_channels=64,
21 |         lower_in_channels=128,
22 |         fusion_out_channels=128,
23 |         out_indices=(0, 1, 2),
24 |         norm_cfg=norm_cfg,
25 |         align_corners=False),
26 |     decode_head=dict(
27 |         type='DepthwiseSeparableFCNHead',
28 |         in_channels=128,
29 |         channels=128,
30 |         concat_input=False,
31 |         num_classes=19,
32 |         in_index=-1,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
37 |     auxiliary_head=[
38 |         dict(
39 |             type='FCNHead',
40 |             in_channels=128,
41 |             channels=32,
42 |             num_convs=1,
43 |             num_classes=19,
44 |             in_index=-2,
45 |             norm_cfg=norm_cfg,
46 |             concat_input=False,
47 |             align_corners=False,
48 |             loss_decode=dict(
49 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
50 |         dict(
51 |             type='FCNHead',
52 |             in_channels=64,
53 |             channels=32,
54 |             num_convs=1,
55 |             num_classes=19,
56 |             in_index=-3,
57 |             norm_cfg=norm_cfg,
58 |             concat_input=False,
59 |             align_corners=False,
60 |             loss_decode=dict(
61 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
62 |     ],
63 |     # model training and testing settings
64 |     train_cfg=dict(),
65 |     test_cfg=dict(mode='whole'))
66 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         dilations=(1, 1, 2, 4),
19 |         strides=(1, 2, 2, 2),
20 |         out_indices=(1, 2, 3),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     neck=dict(
26 |         type='JPU',
27 |         in_channels=(512, 1024, 2048),
28 |         mid_channels=512,
29 |         start_level=0,
30 |         end_level=-1,
31 |         dilations=(1, 2, 4, 8),
32 |         align_corners=False,
33 |         norm_cfg=norm_cfg),
34 |     decode_head=dict(
35 |         type='PSPHead',
36 |         in_channels=2048,
37 |         in_index=2,
38 |         channels=512,
39 |         pool_scales=(1, 2, 3, 6),
40 |         dropout_ratio=0.1,
41 |         num_classes=19,
42 |         norm_cfg=norm_cfg,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
46 |     auxiliary_head=dict(
47 |         type='FCNHead',
48 |         in_channels=1024,
49 |         in_index=1,
50 |         channels=256,
51 |         num_convs=1,
52 |         concat_input=False,
53 |         dropout_ratio=0.1,
54 |         num_classes=19,
55 |         norm_cfg=norm_cfg,
56 |         align_corners=False,
57 |         loss_decode=dict(
58 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
59 |     # model training and testing settings
60 |     train_cfg=dict(),
61 |     test_cfg=dict(mode='whole'))
62 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fcn_hr18.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://msra/hrnetv2_w18',
14 |     backbone=dict(
15 |         type='HRNet',
16 |         norm_cfg=norm_cfg,
17 |         norm_eval=False,
18 |         extra=dict(
19 |             stage1=dict(
20 |                 num_modules=1,
21 |                 num_branches=1,
22 |                 block='BOTTLENECK',
23 |                 num_blocks=(4, ),
24 |                 num_channels=(64, )),
25 |             stage2=dict(
26 |                 num_modules=1,
27 |                 num_branches=2,
28 |                 block='BASIC',
29 |                 num_blocks=(4, 4),
30 |                 num_channels=(18, 36)),
31 |             stage3=dict(
32 |                 num_modules=4,
33 |                 num_branches=3,
34 |                 block='BASIC',
35 |                 num_blocks=(4, 4, 4),
36 |                 num_channels=(18, 36, 72)),
37 |             stage4=dict(
38 |                 num_modules=3,
39 |                 num_branches=4,
40 |                 block='BASIC',
41 |                 num_blocks=(4, 4, 4, 4),
42 |                 num_channels=(18, 36, 72, 144)))),
43 |     decode_head=dict(
44 |         type='FCNHead',
45 |         in_channels=[18, 36, 72, 144],
46 |         in_index=(0, 1, 2, 3),
47 |         channels=sum([18, 36, 72, 144]),
48 |         input_transform='resize_concat',
49 |         kernel_size=1,
50 |         num_convs=1,
51 |         concat_input=False,
52 |         dropout_ratio=-1,
53 |         num_classes=19,
54 |         norm_cfg=norm_cfg,
55 |         align_corners=False,
56 |         loss_decode=dict(
57 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
58 |     # model training and testing settings
59 |     train_cfg=dict(),
60 |     test_cfg=dict(mode='whole'))
61 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fcn_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='FCNHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         num_convs=2,
31 |         concat_input=True,
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=2,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fcn_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='FCNHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=64,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=2,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
44 |     auxiliary_head=dict(
45 |         type='FCNHead',
46 |         in_channels=128,
47 |         in_index=3,
48 |         channels=64,
49 |         num_convs=1,
50 |         concat_input=False,
51 |         dropout_ratio=0.1,
52 |         num_classes=2,
53 |         norm_cfg=norm_cfg,
54 |         align_corners=False,
55 |         loss_decode=dict(
56 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
57 |     # model training and testing settings
58 |     train_cfg=dict(),
59 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
60 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fpn_poolformer_s12.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth'  # noqa
 4 | # TODO: delete custom_imports after mmpretrain supports auto import
 5 | # please install mmpretrain >= 1.0.0rc7
 6 | # import mmpretrain.models to trigger register_module in mmpretrain
 7 | custom_imports = dict(
 8 |     imports=['mmpretrain.models'], allow_failed_imports=False)
 9 | data_preprocessor = dict(
10 |     type='SegDataPreProcessor',
11 |     mean=[123.675, 116.28, 103.53],
12 |     std=[58.395, 57.12, 57.375],
13 |     bgr_to_rgb=True,
14 |     pad_val=0,
15 |     seg_pad_val=255)
16 | model = dict(
17 |     type='EncoderDecoder',
18 |     data_preprocessor=data_preprocessor,
19 |     backbone=dict(
20 |         type='mmpretrain.PoolFormer',
21 |         arch='s12',
22 |         init_cfg=dict(
23 |             type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
24 |         in_patch_size=7,
25 |         in_stride=4,
26 |         in_pad=2,
27 |         down_patch_size=3,
28 |         down_stride=2,
29 |         down_pad=1,
30 |         drop_rate=0.,
31 |         drop_path_rate=0.,
32 |         out_indices=(0, 2, 4, 6),
33 |         frozen_stages=0,
34 |     ),
35 |     neck=dict(
36 |         type='FPN',
37 |         in_channels=[256, 512, 1024, 2048],
38 |         out_channels=256,
39 |         num_outs=4),
40 |     decode_head=dict(
41 |         type='FPNHead',
42 |         in_channels=[256, 256, 256, 256],
43 |         in_index=[0, 1, 2, 3],
44 |         feature_strides=[4, 8, 16, 32],
45 |         channels=128,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 1, 1),
20 |         strides=(1, 2, 2, 2),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     neck=dict(
26 |         type='FPN',
27 |         in_channels=[256, 512, 1024, 2048],
28 |         out_channels=256,
29 |         num_outs=4),
30 |     decode_head=dict(
31 |         type='FPNHead',
32 |         in_channels=[256, 256, 256, 256],
33 |         in_index=[0, 1, 2, 3],
34 |         feature_strides=[4, 8, 16, 32],
35 |         channels=128,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/gcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='GCHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         ratio=1 / 4.,
31 |         pooling_type='att',
32 |         fusion_types=('channel_add', ),
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/isanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='ISAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         isa_channels=256,
31 |         down_factor=(8, 8),
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=2,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/lraspp_m-v3-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     backbone=dict(
14 |         type='MobileNetV3',
15 |         arch='large',
16 |         out_indices=(1, 3, 16),
17 |         norm_cfg=norm_cfg),
18 |     decode_head=dict(
19 |         type='LRASPPHead',
20 |         in_channels=(16, 24, 960),
21 |         in_index=(0, 1, 2),
22 |         channels=128,
23 |         input_transform='multiple_select',
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         act_cfg=dict(type='ReLU'),
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     # model training and testing settings
32 |     train_cfg=dict(),
33 |     test_cfg=dict(mode='whole'))
34 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/nonlocal_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='NLHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         dropout_ratio=0.1,
31 |         reduction=2,
32 |         use_scale=True,
33 |         mode='embedded_gaussian',
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=1024,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/ocrnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='CascadeEncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     num_stages=2,
14 |     pretrained='open-mmlab://resnet50_v1c',
15 |     backbone=dict(
16 |         type='ResNetV1c',
17 |         depth=50,
18 |         num_stages=4,
19 |         out_indices=(0, 1, 2, 3),
20 |         dilations=(1, 1, 2, 4),
21 |         strides=(1, 2, 1, 1),
22 |         norm_cfg=norm_cfg,
23 |         norm_eval=False,
24 |         style='pytorch',
25 |         contract_dilation=True),
26 |     decode_head=[
27 |         dict(
28 |             type='FCNHead',
29 |             in_channels=1024,
30 |             in_index=2,
31 |             channels=256,
32 |             num_convs=1,
33 |             concat_input=False,
34 |             dropout_ratio=0.1,
35 |             num_classes=19,
36 |             norm_cfg=norm_cfg,
37 |             align_corners=False,
38 |             loss_decode=dict(
39 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
40 |         dict(
41 |             type='OCRHead',
42 |             in_channels=2048,
43 |             in_index=3,
44 |             channels=512,
45 |             ocr_channels=256,
46 |             dropout_ratio=0.1,
47 |             num_classes=19,
48 |             norm_cfg=norm_cfg,
49 |             align_corners=False,
50 |             loss_decode=dict(
51 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
52 |     ],
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='whole'))
56 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/pointrend_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='CascadeEncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     num_stages=2,
14 |     pretrained='open-mmlab://resnet50_v1c',
15 |     backbone=dict(
16 |         type='ResNetV1c',
17 |         depth=50,
18 |         num_stages=4,
19 |         out_indices=(0, 1, 2, 3),
20 |         dilations=(1, 1, 1, 1),
21 |         strides=(1, 2, 2, 2),
22 |         norm_cfg=norm_cfg,
23 |         norm_eval=False,
24 |         style='pytorch',
25 |         contract_dilation=True),
26 |     neck=dict(
27 |         type='FPN',
28 |         in_channels=[256, 512, 1024, 2048],
29 |         out_channels=256,
30 |         num_outs=4),
31 |     decode_head=[
32 |         dict(
33 |             type='FPNHead',
34 |             in_channels=[256, 256, 256, 256],
35 |             in_index=[0, 1, 2, 3],
36 |             feature_strides=[4, 8, 16, 32],
37 |             channels=128,
38 |             dropout_ratio=-1,
39 |             num_classes=19,
40 |             norm_cfg=norm_cfg,
41 |             align_corners=False,
42 |             loss_decode=dict(
43 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
44 |         dict(
45 |             type='PointHead',
46 |             in_channels=[256],
47 |             in_index=[0],
48 |             channels=256,
49 |             num_fcs=3,
50 |             coarse_pred_each_layer=True,
51 |             dropout_ratio=-1,
52 |             num_classes=19,
53 |             align_corners=False,
54 |             loss_decode=dict(
55 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
56 |     ],
57 |     # model training and testing settings
58 |     train_cfg=dict(
59 |         num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
60 |     test_cfg=dict(
61 |         mode='whole',
62 |         subdivision_steps=2,
63 |         subdivision_num_points=8196,
64 |         scale_factor=2))
65 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/psanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='PSAHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         mask_size=(97, 97),
31 |         psa_type='bi-direction',
32 |         compact=False,
33 |         shrink_factor=2,
34 |         normalization_factor=1.0,
35 |         psa_softmax=True,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=1024,
45 |         in_index=2,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/pspnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 2, 4),
20 |         strides=(1, 2, 1, 1),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='PSPHead',
27 |         in_channels=2048,
28 |         in_index=3,
29 |         channels=512,
30 |         pool_scales=(1, 2, 3, 6),
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/pspnet_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='UNet',
16 |         in_channels=3,
17 |         base_channels=64,
18 |         num_stages=5,
19 |         strides=(1, 1, 1, 1, 1),
20 |         enc_num_convs=(2, 2, 2, 2, 2),
21 |         dec_num_convs=(2, 2, 2, 2),
22 |         downsamples=(True, True, True, True),
23 |         enc_dilations=(1, 1, 1, 1, 1),
24 |         dec_dilations=(1, 1, 1, 1),
25 |         with_cp=False,
26 |         conv_cfg=None,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=dict(type='ReLU'),
29 |         upsample_cfg=dict(type='InterpConv'),
30 |         norm_eval=False),
31 |     decode_head=dict(
32 |         type='PSPHead',
33 |         in_channels=64,
34 |         in_index=4,
35 |         channels=16,
36 |         pool_scales=(1, 2, 3, 6),
37 |         dropout_ratio=0.1,
38 |         num_classes=2,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=128,
46 |         in_index=3,
47 |         channels=64,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=2,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/segformer_mit-b0.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained=None,
14 |     backbone=dict(
15 |         type='MixVisionTransformer',
16 |         in_channels=3,
17 |         embed_dims=32,
18 |         num_stages=4,
19 |         num_layers=[2, 2, 2, 2],
20 |         num_heads=[1, 2, 5, 8],
21 |         patch_sizes=[7, 3, 3, 3],
22 |         sr_ratios=[8, 4, 2, 1],
23 |         out_indices=(0, 1, 2, 3),
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         drop_rate=0.0,
27 |         attn_drop_rate=0.0,
28 |         drop_path_rate=0.1),
29 |     decode_head=dict(
30 |         type='SegformerHead',
31 |         in_channels=[32, 64, 160, 256],
32 |         in_index=[0, 1, 2, 3],
33 |         channels=256,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     # model training and testing settings
41 |     train_cfg=dict(),
42 |     test_cfg=dict(mode='whole'))
43 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/segmenter_vit-b16_mask.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth'  # noqa
 2 | # model settings
 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[127.5, 127.5, 127.5],
 7 |     std=[127.5, 127.5, 127.5],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=checkpoint,
15 |     backbone=dict(
16 |         type='VisionTransformer',
17 |         img_size=(512, 512),
18 |         patch_size=16,
19 |         in_channels=3,
20 |         embed_dims=768,
21 |         num_layers=12,
22 |         num_heads=12,
23 |         drop_path_rate=0.1,
24 |         attn_drop_rate=0.0,
25 |         drop_rate=0.0,
26 |         final_norm=True,
27 |         norm_cfg=backbone_norm_cfg,
28 |         with_cls_token=True,
29 |         interpolate_mode='bicubic',
30 |     ),
31 |     decode_head=dict(
32 |         type='SegmenterMaskTransformerHead',
33 |         in_channels=768,
34 |         channels=768,
35 |         num_classes=150,
36 |         num_layers=2,
37 |         num_heads=12,
38 |         embed_dims=768,
39 |         dropout_ratio=0.0,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
42 |     ),
43 |     test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)),
44 | )
45 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/twins_pcpvt-s_fpn.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth'  # noqa
 2 | 
 3 | # model settings
 4 | backbone_norm_cfg = dict(type='LN')
 5 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 6 | data_preprocessor = dict(
 7 |     type='SegDataPreProcessor',
 8 |     mean=[123.675, 116.28, 103.53],
 9 |     std=[58.395, 57.12, 57.375],
10 |     bgr_to_rgb=True,
11 |     pad_val=0,
12 |     seg_pad_val=255)
13 | model = dict(
14 |     type='EncoderDecoder',
15 |     data_preprocessor=data_preprocessor,
16 |     backbone=dict(
17 |         type='PCPVT',
18 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
19 |         in_channels=3,
20 |         embed_dims=[64, 128, 320, 512],
21 |         num_heads=[1, 2, 5, 8],
22 |         patch_sizes=[4, 2, 2, 2],
23 |         strides=[4, 2, 2, 2],
24 |         mlp_ratios=[8, 8, 4, 4],
25 |         out_indices=(0, 1, 2, 3),
26 |         qkv_bias=True,
27 |         norm_cfg=backbone_norm_cfg,
28 |         depths=[3, 4, 6, 3],
29 |         sr_ratios=[8, 4, 2, 1],
30 |         norm_after_stage=False,
31 |         drop_rate=0.0,
32 |         attn_drop_rate=0.,
33 |         drop_path_rate=0.2),
34 |     neck=dict(
35 |         type='FPN',
36 |         in_channels=[64, 128, 320, 512],
37 |         out_channels=256,
38 |         num_outs=4),
39 |     decode_head=dict(
40 |         type='FPNHead',
41 |         in_channels=[256, 256, 256, 256],
42 |         in_index=[0, 1, 2, 3],
43 |         feature_strides=[4, 8, 16, 32],
44 |         channels=128,
45 |         dropout_ratio=0.1,
46 |         num_classes=150,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/twins_pcpvt-s_upernet.py:
--------------------------------------------------------------------------------
 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth'  # noqa
 2 | 
 3 | # model settings
 4 | backbone_norm_cfg = dict(type='LN')
 5 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 6 | data_preprocessor = dict(
 7 |     type='SegDataPreProcessor',
 8 |     mean=[123.675, 116.28, 103.53],
 9 |     std=[58.395, 57.12, 57.375],
10 |     bgr_to_rgb=True,
11 |     pad_val=0,
12 |     seg_pad_val=255)
13 | model = dict(
14 |     type='EncoderDecoder',
15 |     data_preprocessor=data_preprocessor,
16 |     backbone=dict(
17 |         type='PCPVT',
18 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
19 |         in_channels=3,
20 |         embed_dims=[64, 128, 320, 512],
21 |         num_heads=[1, 2, 5, 8],
22 |         patch_sizes=[4, 2, 2, 2],
23 |         strides=[4, 2, 2, 2],
24 |         mlp_ratios=[8, 8, 4, 4],
25 |         out_indices=(0, 1, 2, 3),
26 |         qkv_bias=True,
27 |         norm_cfg=backbone_norm_cfg,
28 |         depths=[3, 4, 6, 3],
29 |         sr_ratios=[8, 4, 2, 1],
30 |         norm_after_stage=False,
31 |         drop_rate=0.0,
32 |         attn_drop_rate=0.,
33 |         drop_path_rate=0.2),
34 |     decode_head=dict(
35 |         type='UPerHead',
36 |         in_channels=[64, 128, 320, 512],
37 |         in_index=[0, 1, 2, 3],
38 |         pool_scales=(1, 2, 3, 6),
39 |         channels=512,
40 |         dropout_ratio=0.1,
41 |         num_classes=150,
42 |         norm_cfg=norm_cfg,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
46 |     auxiliary_head=dict(
47 |         type='FCNHead',
48 |         in_channels=320,
49 |         in_index=2,
50 |         channels=256,
51 |         num_convs=1,
52 |         concat_input=False,
53 |         dropout_ratio=0.1,
54 |         num_classes=150,
55 |         norm_cfg=norm_cfg,
56 |         align_corners=False,
57 |         loss_decode=dict(
58 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
59 |     # model training and testing settings
60 |     train_cfg=dict(),
61 |     test_cfg=dict(mode='whole'))
62 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_beit.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained=None,
13 |     backbone=dict(
14 |         type='BEiT',
15 |         img_size=(640, 640),
16 |         patch_size=16,
17 |         in_channels=3,
18 |         embed_dims=768,
19 |         num_layers=12,
20 |         num_heads=12,
21 |         mlp_ratio=4,
22 |         out_indices=(3, 5, 7, 11),
23 |         qv_bias=True,
24 |         attn_drop_rate=0.0,
25 |         drop_path_rate=0.1,
26 |         norm_cfg=dict(type='LN', eps=1e-6),
27 |         act_cfg=dict(type='GELU'),
28 |         norm_eval=False,
29 |         init_values=0.1),
30 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
31 |     decode_head=dict(
32 |         type='UPerHead',
33 |         in_channels=[768, 768, 768, 768],
34 |         in_index=[0, 1, 2, 3],
35 |         pool_scales=(1, 2, 3, 6),
36 |         channels=768,
37 |         dropout_ratio=0.1,
38 |         num_classes=150,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43 |     auxiliary_head=dict(
44 |         type='FCNHead',
45 |         in_channels=768,
46 |         in_index=2,
47 |         channels=256,
48 |         num_convs=1,
49 |         concat_input=False,
50 |         dropout_ratio=0.1,
51 |         num_classes=150,
52 |         norm_cfg=norm_cfg,
53 |         align_corners=False,
54 |         loss_decode=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='whole'))
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_convnext.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False)
 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth'  # noqa
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[123.675, 116.28, 103.53],
 7 |     std=[58.395, 57.12, 57.375],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=None,
15 |     backbone=dict(
16 |         type='mmpretrain.ConvNeXt',
17 |         arch='base',
18 |         out_indices=[0, 1, 2, 3],
19 |         drop_path_rate=0.4,
20 |         layer_scale_init_value=1.0,
21 |         gap_before_final_norm=False,
22 |         init_cfg=dict(
23 |             type='Pretrained', checkpoint=checkpoint_file,
24 |             prefix='backbone.')),
25 |     decode_head=dict(
26 |         type='UPerHead',
27 |         in_channels=[128, 256, 512, 1024],
28 |         in_index=[0, 1, 2, 3],
29 |         pool_scales=(1, 2, 3, 6),
30 |         channels=512,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=384,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_mae.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | data_preprocessor = dict(
 3 |     type='SegDataPreProcessor',
 4 |     mean=[123.675, 116.28, 103.53],
 5 |     std=[58.395, 57.12, 57.375],
 6 |     bgr_to_rgb=True,
 7 |     pad_val=0,
 8 |     seg_pad_val=255)
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     data_preprocessor=data_preprocessor,
12 |     pretrained=None,
13 |     backbone=dict(
14 |         type='MAE',
15 |         img_size=(640, 640),
16 |         patch_size=16,
17 |         in_channels=3,
18 |         embed_dims=768,
19 |         num_layers=12,
20 |         num_heads=12,
21 |         mlp_ratio=4,
22 |         out_indices=(3, 5, 7, 11),
23 |         attn_drop_rate=0.0,
24 |         drop_path_rate=0.1,
25 |         norm_cfg=dict(type='LN', eps=1e-6),
26 |         act_cfg=dict(type='GELU'),
27 |         norm_eval=False,
28 |         init_values=0.1),
29 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
30 |     decode_head=dict(
31 |         type='UPerHead',
32 |         in_channels=[384, 384, 384, 384],
33 |         in_index=[0, 1, 2, 3],
34 |         pool_scales=(1, 2, 3, 6),
35 |         channels=512,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=384,
45 |         in_index=2,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='open-mmlab://resnet50_v1c',
14 |     backbone=dict(
15 |         type='ResNetV1c',
16 |         depth=50,
17 |         num_stages=4,
18 |         out_indices=(0, 1, 2, 3),
19 |         dilations=(1, 1, 1, 1),
20 |         strides=(1, 2, 2, 2),
21 |         norm_cfg=norm_cfg,
22 |         norm_eval=False,
23 |         style='pytorch',
24 |         contract_dilation=True),
25 |     decode_head=dict(
26 |         type='UPerHead',
27 |         in_channels=[256, 512, 1024, 2048],
28 |         in_index=[0, 1, 2, 3],
29 |         pool_scales=(1, 2, 3, 6),
30 |         channels=512,
31 |         dropout_ratio=0.1,
32 |         num_classes=19,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=1024,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_swin.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[123.675, 116.28, 103.53],
 7 |     std=[58.395, 57.12, 57.375],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=None,
15 |     backbone=dict(
16 |         type='SwinTransformer',
17 |         pretrain_img_size=224,
18 |         embed_dims=96,
19 |         patch_size=4,
20 |         window_size=7,
21 |         mlp_ratio=4,
22 |         depths=[2, 2, 6, 2],
23 |         num_heads=[3, 6, 12, 24],
24 |         strides=(4, 2, 2, 2),
25 |         out_indices=(0, 1, 2, 3),
26 |         qkv_bias=True,
27 |         qk_scale=None,
28 |         patch_norm=True,
29 |         drop_rate=0.,
30 |         attn_drop_rate=0.,
31 |         drop_path_rate=0.3,
32 |         use_abs_pos_embed=False,
33 |         act_cfg=dict(type='GELU'),
34 |         norm_cfg=backbone_norm_cfg),
35 |     decode_head=dict(
36 |         type='UPerHead',
37 |         in_channels=[96, 192, 384, 768],
38 |         in_index=[0, 1, 2, 3],
39 |         pool_scales=(1, 2, 3, 6),
40 |         channels=512,
41 |         dropout_ratio=0.1,
42 |         num_classes=19,
43 |         norm_cfg=norm_cfg,
44 |         align_corners=False,
45 |         loss_decode=dict(
46 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
47 |     auxiliary_head=dict(
48 |         type='FCNHead',
49 |         in_channels=384,
50 |         in_index=2,
51 |         channels=256,
52 |         num_convs=1,
53 |         concat_input=False,
54 |         dropout_ratio=0.1,
55 |         num_classes=19,
56 |         norm_cfg=norm_cfg,
57 |         align_corners=False,
58 |         loss_decode=dict(
59 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
60 |     # model training and testing settings
61 |     train_cfg=dict(),
62 |     test_cfg=dict(mode='whole'))
63 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type='SegDataPreProcessor',
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255)
10 | model = dict(
11 |     type='EncoderDecoder',
12 |     data_preprocessor=data_preprocessor,
13 |     pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
14 |     backbone=dict(
15 |         type='VisionTransformer',
16 |         img_size=(512, 512),
17 |         patch_size=16,
18 |         in_channels=3,
19 |         embed_dims=768,
20 |         num_layers=12,
21 |         num_heads=12,
22 |         mlp_ratio=4,
23 |         out_indices=(2, 5, 8, 11),
24 |         qkv_bias=True,
25 |         drop_rate=0.0,
26 |         attn_drop_rate=0.0,
27 |         drop_path_rate=0.0,
28 |         with_cls_token=True,
29 |         norm_cfg=dict(type='LN', eps=1e-6),
30 |         act_cfg=dict(type='GELU'),
31 |         norm_eval=False,
32 |         interpolate_mode='bicubic'),
33 |     neck=dict(
34 |         type='MultiLevelNeck',
35 |         in_channels=[768, 768, 768, 768],
36 |         out_channels=768,
37 |         scales=[4, 2, 1, 0.5]),
38 |     decode_head=dict(
39 |         type='UPerHead',
40 |         in_channels=[768, 768, 768, 768],
41 |         in_index=[0, 1, 2, 3],
42 |         pool_scales=(1, 2, 3, 6),
43 |         channels=512,
44 |         dropout_ratio=0.1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
50 |     auxiliary_head=dict(
51 |         type='FCNHead',
52 |         in_channels=768,
53 |         in_index=3,
54 |         channels=256,
55 |         num_convs=1,
56 |         concat_input=False,
57 |         dropout_ratio=0.1,
58 |         num_classes=19,
59 |         norm_cfg=norm_cfg,
60 |         align_corners=False,
61 |         loss_decode=dict(
62 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
63 |     # model training and testing settings
64 |     train_cfg=dict(),
65 |     test_cfg=dict(mode='whole'))  # yapf: disable
66 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=160000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 160k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=20000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 20k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_240k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=240000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 240k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=240000, val_interval=24000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_25k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='LinearLR', start_factor=3e-2, begin=0, end=12000,
 8 |         by_epoch=False),
 9 |     dict(
10 |         type='PolyLRRatio',
11 |         eta_min_ratio=3e-2,
12 |         power=0.9,
13 |         begin=12000,
14 |         end=24000,
15 |         by_epoch=False),
16 |     dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000)
17 | ]
18 | # training schedule for 25k
19 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000)
20 | val_cfg = dict(type='ValLoop')
21 | test_cfg = dict(type='TestLoop')
22 | default_hooks = dict(
23 |     timer=dict(type='IterTimerHook'),
24 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
25 |     param_scheduler=dict(type='ParamSchedulerHook'),
26 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
27 |     sampler_seed=dict(type='DistSamplerSeedHook'),
28 |     visualization=dict(type='SegVisualizationHook'))
29 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=320000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 320k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=320000, val_interval=32000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=40000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 40k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=80000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 80k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
10 |     auxiliary_head=dict(in_channels=512, num_classes=150),
11 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
12 | )
13 | 
14 | optim_wrapper = dict(
15 |     _delete_=True,
16 |     type='AmpOptimWrapper',
17 |     optimizer=dict(
18 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
19 |     paramwise_cfg={
20 |         'decay_rate': 0.9,
21 |         'decay_type': 'stage_wise',
22 |         'num_layers': 12
23 |     },
24 |     constructor='LearningRateDecayOptimizerConstructor',
25 |     loss_scale='dynamic')
26 | 
27 | param_scheduler = [
28 |     dict(
29 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
30 |     dict(
31 |         type='PolyLR',
32 |         power=1.0,
33 |         begin=1500,
34 |         end=160000,
35 |         eta_min=0.0,
36 |         by_epoch=False,
37 |     )
38 | ]
39 | 
40 | # By default, models are trained on 8 GPUs with 2 images per GPU
41 | train_dataloader = dict(batch_size=2)
42 | val_dataloader = dict(batch_size=1)
43 | test_dataloader = val_dataloader
44 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py',
 3 |     '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (640, 640)
 7 | data_preprocessor = dict(size=crop_size)
 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth'  # noqa
 9 | model = dict(
10 |     data_preprocessor=data_preprocessor,
11 |     backbone=dict(
12 |         type='mmpretrain.ConvNeXt',
13 |         arch='base',
14 |         out_indices=[0, 1, 2, 3],
15 |         drop_path_rate=0.4,
16 |         layer_scale_init_value=1.0,
17 |         gap_before_final_norm=False,
18 |         init_cfg=dict(
19 |             type='Pretrained', checkpoint=checkpoint_file,
20 |             prefix='backbone.')),
21 |     decode_head=dict(
22 |         in_channels=[128, 256, 512, 1024],
23 |         num_classes=150,
24 |     ),
25 |     auxiliary_head=dict(in_channels=512, num_classes=150),
26 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
27 | )
28 | 
29 | optim_wrapper = dict(
30 |     _delete_=True,
31 |     type='AmpOptimWrapper',
32 |     optimizer=dict(
33 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
34 |     paramwise_cfg={
35 |         'decay_rate': 0.9,
36 |         'decay_type': 'stage_wise',
37 |         'num_layers': 12
38 |     },
39 |     constructor='LearningRateDecayOptimizerConstructor',
40 |     loss_scale='dynamic')
41 | 
42 | param_scheduler = [
43 |     dict(
44 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
45 |     dict(
46 |         type='PolyLR',
47 |         power=1.0,
48 |         begin=1500,
49 |         end=160000,
50 |         eta_min=0.0,
51 |         by_epoch=False,
52 |     )
53 | ]
54 | 
55 | # By default, models are trained on 8 GPUs with 2 images per GPU
56 | train_dataloader = dict(batch_size=2)
57 | val_dataloader = dict(batch_size=1)
58 | test_dataloader = val_dataloader
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py',
 3 |     '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (640, 640)
 7 | data_preprocessor = dict(size=crop_size)
 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth'  # noqa
 9 | model = dict(
10 |     data_preprocessor=data_preprocessor,
11 |     backbone=dict(
12 |         type='mmpretrain.ConvNeXt',
13 |         arch='large',
14 |         out_indices=[0, 1, 2, 3],
15 |         drop_path_rate=0.4,
16 |         layer_scale_init_value=1.0,
17 |         gap_before_final_norm=False,
18 |         init_cfg=dict(
19 |             type='Pretrained', checkpoint=checkpoint_file,
20 |             prefix='backbone.')),
21 |     decode_head=dict(
22 |         in_channels=[192, 384, 768, 1536],
23 |         num_classes=150,
24 |     ),
25 |     auxiliary_head=dict(in_channels=768, num_classes=150),
26 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
27 | )
28 | 
29 | optim_wrapper = dict(
30 |     _delete_=True,
31 |     type='AmpOptimWrapper',
32 |     optimizer=dict(
33 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
34 |     paramwise_cfg={
35 |         'decay_rate': 0.9,
36 |         'decay_type': 'stage_wise',
37 |         'num_layers': 12
38 |     },
39 |     constructor='LearningRateDecayOptimizerConstructor',
40 |     loss_scale='dynamic')
41 | 
42 | param_scheduler = [
43 |     dict(
44 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
45 |     dict(
46 |         type='PolyLR',
47 |         power=1.0,
48 |         begin=1500,
49 |         end=160000,
50 |         eta_min=0.0,
51 |         by_epoch=False,
52 |     )
53 | ]
54 | 
55 | # By default, models are trained on 8 GPUs with 2 images per GPU
56 | train_dataloader = dict(batch_size=2)
57 | val_dataloader = dict(batch_size=1)
58 | test_dataloader = val_dataloader
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth'  # noqa
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     backbone=dict(
11 |         type='mmpretrain.ConvNeXt',
12 |         arch='small',
13 |         out_indices=[0, 1, 2, 3],
14 |         drop_path_rate=0.3,
15 |         layer_scale_init_value=1.0,
16 |         gap_before_final_norm=False,
17 |         init_cfg=dict(
18 |             type='Pretrained', checkpoint=checkpoint_file,
19 |             prefix='backbone.')),
20 |     decode_head=dict(
21 |         in_channels=[96, 192, 384, 768],
22 |         num_classes=150,
23 |     ),
24 |     auxiliary_head=dict(in_channels=384, num_classes=150),
25 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
26 | )
27 | 
28 | optim_wrapper = dict(
29 |     _delete_=True,
30 |     type='AmpOptimWrapper',
31 |     optimizer=dict(
32 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
33 |     paramwise_cfg={
34 |         'decay_rate': 0.9,
35 |         'decay_type': 'stage_wise',
36 |         'num_layers': 12
37 |     },
38 |     constructor='LearningRateDecayOptimizerConstructor',
39 |     loss_scale='dynamic')
40 | 
41 | param_scheduler = [
42 |     dict(
43 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
44 |     dict(
45 |         type='PolyLR',
46 |         power=1.0,
47 |         begin=1500,
48 |         end=160000,
49 |         eta_min=0.0,
50 |         by_epoch=False,
51 |     )
52 | ]
53 | 
54 | # By default, models are trained on 8 GPUs with 2 images per GPU
55 | train_dataloader = dict(batch_size=2)
56 | val_dataloader = dict(batch_size=1)
57 | test_dataloader = val_dataloader
58 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     backbone=dict(
11 |         type='mmpretrain.ConvNeXt',
12 |         arch='tiny',
13 |         out_indices=[0, 1, 2, 3],
14 |         drop_path_rate=0.4,
15 |         layer_scale_init_value=1.0,
16 |         gap_before_final_norm=False,
17 |         init_cfg=dict(
18 |             type='Pretrained', checkpoint=checkpoint_file,
19 |             prefix='backbone.')),
20 |     decode_head=dict(
21 |         in_channels=[96, 192, 384, 768],
22 |         num_classes=150,
23 |     ),
24 |     auxiliary_head=dict(in_channels=384, num_classes=150),
25 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
26 | )
27 | 
28 | optim_wrapper = dict(
29 |     _delete_=True,
30 |     type='AmpOptimWrapper',
31 |     optimizer=dict(
32 |         type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
33 |     paramwise_cfg={
34 |         'decay_rate': 0.9,
35 |         'decay_type': 'stage_wise',
36 |         'num_layers': 6
37 |     },
38 |     constructor='LearningRateDecayOptimizerConstructor',
39 |     loss_scale='dynamic')
40 | 
41 | param_scheduler = [
42 |     dict(
43 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
44 |     dict(
45 |         type='PolyLR',
46 |         power=1.0,
47 |         begin=1500,
48 |         end=160000,
49 |         eta_min=0.0,
50 |         by_epoch=False,
51 |     )
52 | ]
53 | 
54 | # By default, models are trained on 8 GPUs with 2 images per GPU
55 | train_dataloader = dict(batch_size=2)
56 | val_dataloader = dict(batch_size=1)
57 | test_dataloader = val_dataloader
58 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_convnext.py',
 3 |     '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (640, 640)
 7 | data_preprocessor = dict(size=crop_size)
 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth'  # noqa
 9 | model = dict(
10 |     data_preprocessor=data_preprocessor,
11 |     backbone=dict(
12 |         type='mmpretrain.ConvNeXt',
13 |         arch='xlarge',
14 |         out_indices=[0, 1, 2, 3],
15 |         drop_path_rate=0.4,
16 |         layer_scale_init_value=1.0,
17 |         gap_before_final_norm=False,
18 |         init_cfg=dict(
19 |             type='Pretrained', checkpoint=checkpoint_file,
20 |             prefix='backbone.')),
21 |     decode_head=dict(
22 |         in_channels=[256, 512, 1024, 2048],
23 |         num_classes=150,
24 |     ),
25 |     auxiliary_head=dict(in_channels=1024, num_classes=150),
26 |     test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
27 | )
28 | 
29 | optim_wrapper = dict(
30 |     _delete_=True,
31 |     type='AmpOptimWrapper',
32 |     optimizer=dict(
33 |         type='AdamW', lr=0.00008, betas=(0.9, 0.999), weight_decay=0.05),
34 |     paramwise_cfg={
35 |         'decay_rate': 0.9,
36 |         'decay_type': 'stage_wise',
37 |         'num_layers': 12
38 |     },
39 |     constructor='LearningRateDecayOptimizerConstructor',
40 |     loss_scale='dynamic')
41 | 
42 | param_scheduler = [
43 |     dict(
44 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
45 |     dict(
46 |         type='PolyLR',
47 |         power=1.0,
48 |         begin=1500,
49 |         end=160000,
50 |         eta_min=0.0,
51 |         by_epoch=False,
52 |     )
53 | ]
54 | 
55 | # By default, models are trained on 8 GPUs with 2 images per GPU
56 | train_dataloader = dict(batch_size=2)
57 | val_dataloader = dict(batch_size=1)
58 | test_dataloader = val_dataloader
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         pretrain_img_size=384,
 9 |         embed_dims=128,
10 |         depths=[2, 2, 18, 2],
11 |         num_heads=[4, 8, 16, 32],
12 |         window_size=12),
13 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
14 |     auxiliary_head=dict(in_channels=512, num_classes=150))
15 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     './swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py'  # noqa
3 | ]
4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth'  # noqa
5 | model = dict(
6 |     backbone=dict(
7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)))
8 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         embed_dims=128,
 9 |         depths=[2, 2, 18, 2],
10 |         num_heads=[4, 8, 16, 32]),
11 |     decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150),
12 |     auxiliary_head=dict(in_channels=512, num_classes=150))
13 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     './swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
3 | ]
4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth'  # noqa
5 | model = dict(
6 |     backbone=dict(
7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)))
8 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-large-patch4-window7-in22k-pre_upernet_'
 3 |     '8xb2-160k_ade20k-512x512.py'
 4 | ]
 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth'  # noqa
 6 | model = dict(
 7 |     backbone=dict(
 8 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 9 |         pretrain_img_size=384,
10 |         window_size=12))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_'
 3 |     'ade20k-512x512.py'
 4 | ]
 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth'  # noqa
 6 | model = dict(
 7 |     backbone=dict(
 8 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 9 |         pretrain_img_size=224,
10 |         embed_dims=192,
11 |         depths=[2, 2, 18, 2],
12 |         num_heads=[6, 12, 24, 48],
13 |         window_size=7),
14 |     decode_head=dict(in_channels=[192, 384, 768, 1536], num_classes=150),
15 |     auxiliary_head=dict(in_channels=768, num_classes=150))
16 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth'  # noqa
 5 | model = dict(
 6 |     backbone=dict(
 7 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
 8 |         depths=[2, 2, 18, 2]),
 9 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150),
10 |     auxiliary_head=dict(in_channels=384, num_classes=150))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth'  # noqa
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     backbone=dict(
11 |         init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
12 |         embed_dims=96,
13 |         depths=[2, 2, 6, 2],
14 |         num_heads=[3, 6, 12, 24],
15 |         window_size=7,
16 |         use_abs_pos_embed=False,
17 |         drop_path_rate=0.3,
18 |         patch_norm=True),
19 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150),
20 |     auxiliary_head=dict(in_channels=384, num_classes=150))
21 | 
22 | # AdamW optimizer, no weight decay for position embedding & layer norm
23 | # in backbone
24 | optim_wrapper = dict(
25 |     _delete_=True,
26 |     type='OptimWrapper',
27 |     optimizer=dict(
28 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
29 |     paramwise_cfg=dict(
30 |         custom_keys={
31 |             'absolute_pos_embed': dict(decay_mult=0.),
32 |             'relative_position_bias_table': dict(decay_mult=0.),
33 |             'norm': dict(decay_mult=0.)
34 |         }))
35 | 
36 | param_scheduler = [
37 |     dict(
38 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
39 |     dict(
40 |         type='PolyLR',
41 |         eta_min=0.0,
42 |         power=1.0,
43 |         begin=1500,
44 |         end=160000,
45 |         by_epoch=False,
46 |     )
47 | ]
48 | 
49 | # By default, models are trained on 8 GPUs with 2 images per GPU
50 | train_dataloader = dict(batch_size=2)
51 | val_dataloader = dict(batch_size=1)
52 | test_dataloader = val_dataloader
53 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_swin.py', '../_base_/datasets/levir_256x256.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
 4 | ]
 5 | crop_size = (256, 256)
 6 | norm_cfg = dict(type='BN', requires_grad=True)
 7 | data_preprocessor = dict(
 8 |     size=crop_size,
 9 |     type='SegDataPreProcessor',
10 |     mean=[123.675, 116.28, 103.53, 123.675, 116.28, 103.53],
11 |     std=[58.395, 57.12, 57.375, 58.395, 57.12, 57.375])
12 | 
13 | model = dict(
14 |     data_preprocessor=data_preprocessor,
15 |     backbone=dict(
16 |         in_channels=6,
17 |         embed_dims=96,
18 |         depths=[2, 2, 6, 2],
19 |         num_heads=[3, 6, 12, 24],
20 |         window_size=7,
21 |         use_abs_pos_embed=False,
22 |         drop_path_rate=0.3,
23 |         patch_norm=True),
24 |     decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=2),
25 |     auxiliary_head=dict(in_channels=384, num_classes=2))
26 | 
27 | # AdamW optimizer, no weight decay for position embedding & layer norm
28 | # in backbone
29 | optim_wrapper = dict(
30 |     _delete_=True,
31 |     type='OptimWrapper',
32 |     optimizer=dict(
33 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
34 |     paramwise_cfg=dict(
35 |         custom_keys={
36 |             'absolute_pos_embed': dict(decay_mult=0.),
37 |             'relative_position_bias_table': dict(decay_mult=0.),
38 |             'norm': dict(decay_mult=0.)
39 |         }))
40 | 
41 | param_scheduler = [
42 |     dict(
43 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
44 |     dict(
45 |         type='PolyLR',
46 |         eta_min=0.0,
47 |         power=1.0,
48 |         begin=1500,
49 |         end=20000,
50 |         by_epoch=False,
51 |     )
52 | ]
53 | 
54 | train_dataloader = dict(batch_size=4)
55 | val_dataloader = dict(batch_size=1)
56 | test_dataloader = val_dataloader
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-769x769.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-769x769.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-160k_ade20k-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-20k_voc12aug-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-40k_voc12aug-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb4-80k_ade20k-512x512.py'
2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
3 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py'
2 | model = dict(
3 |     pretrained='open-mmlab://resnet18_v1c',
4 |     backbone=dict(depth=18),
5 |     decode_head=dict(in_channels=[64, 128, 256, 512]),
6 |     auxiliary_head=dict(in_channels=256))
7 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py'
2 | model = dict(
3 |     pretrained='open-mmlab://resnet18_v1c',
4 |     backbone=dict(depth=18),
5 |     decode_head=dict(in_channels=[64, 128, 256, 512]),
6 |     auxiliary_head=dict(in_channels=256))
7 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | model = dict(
 6 |     pretrained='open-mmlab://resnet18_v1c',
 7 |     backbone=dict(depth=18),
 8 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150),
 9 |     auxiliary_head=dict(in_channels=256, num_classes=150))
10 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_20k.py'
 5 | ]
 6 | model = dict(
 7 |     pretrained='open-mmlab://resnet18_v1c',
 8 |     backbone=dict(depth=18),
 9 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21),
10 |     auxiliary_head=dict(in_channels=256, num_classes=21))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | model = dict(
 7 |     pretrained='open-mmlab://resnet18_v1c',
 8 |     backbone=dict(depth=18),
 9 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21),
10 |     auxiliary_head=dict(in_channels=256, num_classes=21))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | model = dict(
 6 |     pretrained='open-mmlab://resnet18_v1c',
 7 |     backbone=dict(depth=18),
 8 |     decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150),
 9 |     auxiliary_head=dict(in_channels=256, num_classes=150))
10 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
4 | ]
5 | crop_size = (512, 1024)
6 | data_preprocessor = dict(size=crop_size)
7 | model = dict(data_preprocessor=data_preprocessor)
8 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | crop_size = (769, 769)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(align_corners=True),
11 |     auxiliary_head=dict(align_corners=True),
12 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
13 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 | crop_size = (512, 1024)
6 | data_preprocessor = dict(size=crop_size)
7 | model = dict(data_preprocessor=data_preprocessor)
8 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_80k.py'
 5 | ]
 6 | crop_size = (769, 769)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(align_corners=True),
11 |     auxiliary_head=dict(align_corners=True),
12 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
13 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(num_classes=150),
10 |     auxiliary_head=dict(num_classes=150))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_20k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(num_classes=21),
11 |     auxiliary_head=dict(num_classes=21))
12 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py',
 3 |     '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_40k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     decode_head=dict(num_classes=21),
11 |     auxiliary_head=dict(num_classes=21))
12 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | crop_size = (512, 512)
 6 | data_preprocessor = dict(size=crop_size)
 7 | model = dict(
 8 |     data_preprocessor=data_preprocessor,
 9 |     decode_head=dict(num_classes=150),
10 |     auxiliary_head=dict(num_classes=150))
11 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1, final_norm=True))
6 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 | )
7 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 |     neck=None)
7 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth',
5 |     backbone=dict(drop_path_rate=0.1),
6 |     neck=None)
7 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
 2 | 
 3 | model = dict(
 4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
 5 |     backbone=dict(
 6 |         num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True),
 7 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
 8 |     neck=dict(in_channels=[384, 384, 384, 384], out_channels=384),
 9 |     auxiliary_head=dict(num_classes=150, in_channels=384))
10 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=dict(in_channels=[384, 384, 384, 384], out_channels=384),
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=None,
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py'
2 | 
3 | model = dict(
4 |     pretrained='pretrain/deit_small_patch16_224-cd65a155.pth',
5 |     backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1),
6 |     decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]),
7 |     neck=None,
8 |     auxiliary_head=dict(num_classes=150, in_channels=384))
9 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     backbone=dict(drop_path_rate=0.1, final_norm=True),
12 |     decode_head=dict(num_classes=150),
13 |     auxiliary_head=dict(num_classes=150))
14 | 
15 | # AdamW optimizer, no weight decay for position embedding & layer norm
16 | # in backbone
17 | optim_wrapper = dict(
18 |     _delete_=True,
19 |     type='OptimWrapper',
20 |     optimizer=dict(
21 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
22 |     paramwise_cfg=dict(
23 |         custom_keys={
24 |             'pos_embed': dict(decay_mult=0.),
25 |             'cls_token': dict(decay_mult=0.),
26 |             'norm': dict(decay_mult=0.)
27 |         }))
28 | 
29 | param_scheduler = [
30 |     dict(
31 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
32 |     dict(
33 |         type='PolyLR',
34 |         eta_min=0.0,
35 |         power=1.0,
36 |         begin=1500,
37 |         end=160000,
38 |         by_epoch=False,
39 |     )
40 | ]
41 | 
42 | # By default, models are trained on 8 GPUs with 2 images per GPU
43 | train_dataloader = dict(batch_size=2)
44 | val_dataloader = dict(batch_size=1)
45 | test_dataloader = val_dataloader
46 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_160k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     decode_head=dict(num_classes=150),
12 |     auxiliary_head=dict(num_classes=150))
13 | 
14 | # AdamW optimizer, no weight decay for position embedding & layer norm
15 | # in backbone
16 | optim_wrapper = dict(
17 |     _delete_=True,
18 |     type='OptimWrapper',
19 |     optimizer=dict(
20 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
21 |     paramwise_cfg=dict(
22 |         custom_keys={
23 |             'pos_embed': dict(decay_mult=0.),
24 |             'cls_token': dict(decay_mult=0.),
25 |             'norm': dict(decay_mult=0.)
26 |         }))
27 | 
28 | param_scheduler = [
29 |     dict(
30 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
31 |     dict(
32 |         type='PolyLR',
33 |         eta_min=0.0,
34 |         power=1.0,
35 |         begin=1500,
36 |         end=160000,
37 |         by_epoch=False,
38 |     )
39 | ]
40 | 
41 | # By default, models are trained on 8 GPUs with 2 images per GPU
42 | train_dataloader = dict(batch_size=2)
43 | val_dataloader = dict(batch_size=1)
44 | test_dataloader = val_dataloader
45 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/upernet_vit-b16_ln_mln.py',
 3 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 4 |     '../_base_/schedules/schedule_80k.py'
 5 | ]
 6 | crop_size = (512, 512)
 7 | data_preprocessor = dict(size=crop_size)
 8 | model = dict(
 9 |     data_preprocessor=data_preprocessor,
10 |     pretrained='pretrain/vit_base_patch16_224.pth',
11 |     decode_head=dict(num_classes=150),
12 |     auxiliary_head=dict(num_classes=150))
13 | 
14 | # AdamW optimizer, no weight decay for position embedding & layer norm
15 | # in backbone
16 | optim_wrapper = dict(
17 |     _delete_=True,
18 |     type='OptimWrapper',
19 |     optimizer=dict(
20 |         type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
21 |     paramwise_cfg=dict(
22 |         custom_keys={
23 |             'pos_embed': dict(decay_mult=0.),
24 |             'cls_token': dict(decay_mult=0.),
25 |             'norm': dict(decay_mult=0.)
26 |         }))
27 | 
28 | param_scheduler = [
29 |     dict(
30 |         type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
31 |     dict(
32 |         type='PolyLR',
33 |         eta_min=0.0,
34 |         power=1.0,
35 |         begin=1500,
36 |         end=80000,
37 |         by_epoch=False,
38 |     )
39 | ]
40 | 
41 | # By default, models are trained on 8 GPUs with 2 images per GPU
42 | train_dataloader = dict(batch_size=2)
43 | val_dataloader = dict(batch_size=1)
44 | test_dataloader = val_dataloader
45 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/configs/vssm_2d/upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'
 3 | ]
 4 | # checkpoint_path = ('/scratch/KurcGroup/jingwei/result/v2dmamba/vmamba/'
 5 | #                    'vmambav2v_2d_tiny_224/vssm1_tiny_0230s/20241023030251/'
 6 | #                    'ckpt_epoch_269.pth')  # noqa
 7 | # checkpoint_path = ('/gpfs/scratch/jingwezhang/checkpoint/v2dmamba/tiny_1k/ckpt_epoch_269.pth')  # noqa
 8 | checkpoint_path = ('')
 9 | model = dict(
10 |     backbone=dict(
11 |         type='MM_VSSM',
12 |         out_indices=(0, 1, 2, 3),
13 |         pretrained=checkpoint_path, # here is the path
14 |         # copied from classification/configs/vssm/vssm_tiny_224.yaml
15 |         dims=96,
16 |         # depths=(2, 2, 5, 2),
17 |         depths=(2, 2, 8, 2),
18 |         ssm_d_state=1,
19 |         ssm_dt_rank="auto",
20 |         # ssm_ratio=2.0,
21 |         ssm_ratio=1.0,
22 |         ssm_conv=3,
23 |         ssm_conv_bias=False,
24 |         forward_type="v05_noz", # v3_noz,
25 |         mlp_ratio=4.0,
26 |         downsample_version="v3",
27 |         patchembed_version="v2",
28 |         drop_path_rate=0.2,
29 |         norm_layer="ln2d",
30 |         use_v2d=True
31 |     ),)
32 | train_dataloader = dict(batch_size=4) # as gpus=4
33 | 
34 | # default_hooks = dict(
35 | #     timer=dict(type='IterTimerHook'),
36 | #     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
37 | #     param_scheduler=dict(type='ParamSchedulerHook'),
38 | #     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
39 | #     sampler_seed=dict(type='DistSamplerSeedHook'),
40 | #     visualization=dict(type='SegVisualizationHook', draw=True, interval=1))
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from functools import partial
 3 | from typing import Callable
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.utils import checkpoint
 8 | 
 9 | from mmengine.model import BaseModule
10 | from mmdet.registry import MODELS as MODELS_MMDET
11 | from mmseg.registry import MODELS as MODELS_MMSEG
12 | 
13 | def import_abspy(name="models", path="classification/"):
14 |     import sys
15 |     import importlib
16 |     path = os.path.abspath(path)
17 |     assert os.path.isdir(path)
18 |     sys.path.insert(0, path)
19 |     module = importlib.import_module(name)
20 |     sys.path.pop(0)
21 |     return module
22 | 
23 | build = import_abspy(
24 |     "models", 
25 |     os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"),
26 | )
27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM
28 | 
29 | @MODELS_MMSEG.register_module()
30 | @MODELS_MMDET.register_module()
31 | class MM_VSSM(BaseModule, Backbone_VSSM):
32 |     def __init__(self, *args, **kwargs):
33 |         BaseModule.__init__(self)
34 |         Backbone_VSSM.__init__(self, *args, **kwargs)
35 | 
36 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/readme.md:
--------------------------------------------------------------------------------
1 | ## origins 
2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmsegmentation: `version 1.2.2`
3 | 
4 | ## modifications
5 | `tools/train.py#13` is added with `import model`
6 | `tools/test.py#8` is added with `import model`
7 | 
8 |  


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/dataset_converters/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | 
 5 | from cityscapesscripts.preparation.json2labelImg import json2labelImg
 6 | from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress,
 7 |                             track_progress)
 8 | 
 9 | 
10 | def convert_json_to_label(json_file):
11 |     label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
12 |     json2labelImg(json_file, label_file, 'trainIds')
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(
17 |         description='Convert Cityscapes annotations to TrainIds')
18 |     parser.add_argument('cityscapes_path', help='cityscapes data path')
19 |     parser.add_argument('--gt-dir', default='gtFine', type=str)
20 |     parser.add_argument('-o', '--out-dir', help='output path')
21 |     parser.add_argument(
22 |         '--nproc', default=1, type=int, help='number of process')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | def main():
28 |     args = parse_args()
29 |     cityscapes_path = args.cityscapes_path
30 |     out_dir = args.out_dir if args.out_dir else cityscapes_path
31 |     mkdir_or_exist(out_dir)
32 | 
33 |     gt_dir = osp.join(cityscapes_path, args.gt_dir)
34 | 
35 |     poly_files = []
36 |     for poly in scandir(gt_dir, '_polygons.json', recursive=True):
37 |         poly_file = osp.join(gt_dir, poly)
38 |         poly_files.append(poly_file)
39 |     if args.nproc > 1:
40 |         track_parallel_progress(convert_json_to_label, poly_files, args.nproc)
41 |     else:
42 |         track_progress(convert_json_to_label, poly_files)
43 | 
44 |     split_names = ['train', 'val', 'test']
45 | 
46 |     for split in split_names:
47 |         filenames = []
48 |         for poly in scandir(
49 |                 osp.join(gt_dir, split), '_polygons.json', recursive=True):
50 |             filenames.append(poly.replace('_gtFine_polygons.json', ''))
51 |         with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
52 |             f.writelines(f + '\n' for f in filenames)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | CHECKPOINT=$2
 3 | GPUS=$3
 4 | NNODES=${NNODES:-1}
 5 | NODE_RANK=${NODE_RANK:-0}
 6 | PORT=${PORT:-29500}
 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch \
11 |     --nnodes=$NNODES \
12 |     --node_rank=$NODE_RANK \
13 |     --master_addr=$MASTER_ADDR \
14 |     --nproc_per_node=$GPUS \
15 |     --master_port=$PORT \
16 |     $(dirname "$0")/test.py \
17 |     $CONFIG \
18 |     $CHECKPOINT \
19 |     --launcher pytorch \
20 |     ${@:4}
21 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | CONFIG=$1
 2 | GPUS=$2
 3 | NNODES=${NNODES:-1}
 4 | NODE_RANK=${NODE_RANK:-0}
 5 | PORT=${PORT:-29500}
 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch \
10 |     --nnodes=$NNODES \
11 |     --node_rank=$NODE_RANK \
12 |     --master_addr=$MASTER_ADDR \
13 |     --nproc_per_node=$GPUS \
14 |     --master_port=$PORT \
15 |     $(dirname "$0")/train.py \
16 |     $CONFIG \
17 |     --launcher pytorch ${@:3}
18 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/misc/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | from hashlib import sha256
 5 | 
 6 | import torch
 7 | 
 8 | BLOCK_SIZE = 128 * 1024
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(
13 |         description='Process a checkpoint to be published')
14 |     parser.add_argument('in_file', help='input checkpoint filename')
15 |     parser.add_argument('out_file', help='output checkpoint filename')
16 |     args = parser.parse_args()
17 |     return args
18 | 
19 | 
20 | def sha256sum(filename: str) -> str:
21 |     """Compute SHA256 message digest from a file."""
22 |     hash_func = sha256()
23 |     byte_array = bytearray(BLOCK_SIZE)
24 |     memory_view = memoryview(byte_array)
25 |     with open(filename, 'rb', buffering=0) as file:
26 |         for block in iter(lambda: file.readinto(memory_view), 0):
27 |             hash_func.update(memory_view[:block])
28 |     return hash_func.hexdigest()
29 | 
30 | 
31 | def process_checkpoint(in_file, out_file):
32 |     checkpoint = torch.load(in_file, map_location='cpu')
33 |     # remove optimizer for smaller file size
34 |     if 'optimizer' in checkpoint:
35 |         del checkpoint['optimizer']
36 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
37 |     # add the code here.
38 |     torch.save(checkpoint, out_file)
39 |     sha = sha256sum(in_file)
40 |     final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
41 |     subprocess.Popen(['mv', out_file, final_file])
42 | 
43 | 
44 | def main():
45 |     args = parse_args()
46 |     process_checkpoint(args.in_file, args.out_file)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/model_converters/beit2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmengine
 7 | import torch
 8 | from mmengine.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_beit(ckpt):
12 |     new_ckpt = OrderedDict()
13 | 
14 |     for k, v in ckpt.items():
15 |         if k.startswith('patch_embed'):
16 |             new_key = k.replace('patch_embed.proj', 'patch_embed.projection')
17 |             new_ckpt[new_key] = v
18 |         if k.startswith('blocks'):
19 |             new_key = k.replace('blocks', 'layers')
20 |             if 'norm' in new_key:
21 |                 new_key = new_key.replace('norm', 'ln')
22 |             elif 'mlp.fc1' in new_key:
23 |                 new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0')
24 |             elif 'mlp.fc2' in new_key:
25 |                 new_key = new_key.replace('mlp.fc2', 'ffn.layers.1')
26 |             new_ckpt[new_key] = v
27 |         else:
28 |             new_key = k
29 |             new_ckpt[new_key] = v
30 | 
31 |     return new_ckpt
32 | 
33 | 
34 | def main():
35 |     parser = argparse.ArgumentParser(
36 |         description='Convert keys in official pretrained beit models to'
37 |         'MMSegmentation style.')
38 |     parser.add_argument('src', help='src model path or url')
39 |     # The dst path must be a full path of the new checkpoint.
40 |     parser.add_argument('dst', help='save path')
41 |     args = parser.parse_args()
42 | 
43 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
44 |     if 'state_dict' in checkpoint:
45 |         state_dict = checkpoint['state_dict']
46 |     elif 'model' in checkpoint:
47 |         state_dict = checkpoint['model']
48 |     else:
49 |         state_dict = checkpoint
50 |     weight = convert_beit(state_dict)
51 |     mmengine.mkdir_or_exist(osp.dirname(args.dst))
52 |     torch.save(weight, args.dst)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/model_converters/vit2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmengine
 7 | import torch
 8 | from mmengine.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_vit(ckpt):
12 | 
13 |     new_ckpt = OrderedDict()
14 | 
15 |     for k, v in ckpt.items():
16 |         if k.startswith('head'):
17 |             continue
18 |         if k.startswith('norm'):
19 |             new_k = k.replace('norm.', 'ln1.')
20 |         elif k.startswith('patch_embed'):
21 |             if 'proj' in k:
22 |                 new_k = k.replace('proj', 'projection')
23 |             else:
24 |                 new_k = k
25 |         elif k.startswith('blocks'):
26 |             if 'norm' in k:
27 |                 new_k = k.replace('norm', 'ln')
28 |             elif 'mlp.fc1' in k:
29 |                 new_k = k.replace('mlp.fc1', 'ffn.layers.0.0')
30 |             elif 'mlp.fc2' in k:
31 |                 new_k = k.replace('mlp.fc2', 'ffn.layers.1')
32 |             elif 'attn.qkv' in k:
33 |                 new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_')
34 |             elif 'attn.proj' in k:
35 |                 new_k = k.replace('attn.proj', 'attn.attn.out_proj')
36 |             else:
37 |                 new_k = k
38 |             new_k = new_k.replace('blocks.', 'layers.')
39 |         else:
40 |             new_k = k
41 |         new_ckpt[new_k] = v
42 | 
43 |     return new_ckpt
44 | 
45 | 
46 | def main():
47 |     parser = argparse.ArgumentParser(
48 |         description='Convert keys in timm pretrained vit models to '
49 |         'MMSegmentation style.')
50 |     parser.add_argument('src', help='src model path or url')
51 |     # The dst path must be a full path of the new checkpoint.
52 |     parser.add_argument('dst', help='save path')
53 |     args = parser.parse_args()
54 | 
55 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
56 |     if 'state_dict' in checkpoint:
57 |         # timm checkpoint
58 |         state_dict = checkpoint['state_dict']
59 |     elif 'model' in checkpoint:
60 |         # deit checkpoint
61 |         state_dict = checkpoint['model']
62 |     else:
63 |         state_dict = checkpoint
64 |     weight = convert_vit(state_dict)
65 |     mmengine.mkdir_or_exist(osp.dirname(args.dst))
66 |     torch.save(weight, args.dst)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-4}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-4}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/torchserve/mmseg_handler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import base64
 3 | import os
 4 | 
 5 | import cv2
 6 | import mmcv
 7 | import torch
 8 | from mmengine.model.utils import revert_sync_batchnorm
 9 | from ts.torch_handler.base_handler import BaseHandler
10 | 
11 | from mmseg.apis import inference_model, init_model
12 | 
13 | 
14 | class MMsegHandler(BaseHandler):
15 | 
16 |     def initialize(self, context):
17 |         properties = context.system_properties
18 |         self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
19 |         self.device = torch.device(self.map_location + ':' +
20 |                                    str(properties.get('gpu_id')) if torch.cuda.
21 |                                    is_available() else self.map_location)
22 |         self.manifest = context.manifest
23 | 
24 |         model_dir = properties.get('model_dir')
25 |         serialized_file = self.manifest['model']['serializedFile']
26 |         checkpoint = os.path.join(model_dir, serialized_file)
27 |         self.config_file = os.path.join(model_dir, 'config.py')
28 | 
29 |         self.model = init_model(self.config_file, checkpoint, self.device)
30 |         self.model = revert_sync_batchnorm(self.model)
31 |         self.initialized = True
32 | 
33 |     def preprocess(self, data):
34 |         images = []
35 | 
36 |         for row in data:
37 |             image = row.get('data') or row.get('body')
38 |             if isinstance(image, str):
39 |                 image = base64.b64decode(image)
40 |             image = mmcv.imfrombytes(image)
41 |             images.append(image)
42 | 
43 |         return images
44 | 
45 |     def inference(self, data, *args, **kwargs):
46 |         results = [inference_model(self.model, img) for img in data]
47 |         return results
48 | 
49 |     def postprocess(self, data):
50 |         output = []
51 | 
52 |         for image_result in data:
53 |             _, buffer = cv2.imencode('.png', image_result[0].astype('uint8'))
54 |             content = buffer.tobytes()
55 |             output.append(content)
56 |         return output
57 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/tools/torchserve/test_torchserve.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from argparse import ArgumentParser
 3 | from io import BytesIO
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | import mmcv
 7 | import requests
 8 | 
 9 | from mmseg.apis import inference_model, init_model
10 | 
11 | 
12 | def parse_args():
13 |     parser = ArgumentParser(
14 |         description='Compare result of torchserve and pytorch,'
15 |         'and visualize them.')
16 |     parser.add_argument('img', help='Image file')
17 |     parser.add_argument('config', help='Config file')
18 |     parser.add_argument('checkpoint', help='Checkpoint file')
19 |     parser.add_argument('model_name', help='The model name in the server')
20 |     parser.add_argument(
21 |         '--inference-addr',
22 |         default='127.0.0.1:8080',
23 |         help='Address and port of the inference server')
24 |     parser.add_argument(
25 |         '--result-image',
26 |         type=str,
27 |         default=None,
28 |         help='save server output in result-image')
29 |     parser.add_argument(
30 |         '--device', default='cuda:0', help='Device used for inference')
31 | 
32 |     args = parser.parse_args()
33 |     return args
34 | 
35 | 
36 | def main(args):
37 |     url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
38 |     with open(args.img, 'rb') as image:
39 |         tmp_res = requests.post(url, image)
40 |     content = tmp_res.content
41 |     if args.result_image:
42 |         with open(args.result_image, 'wb') as out_image:
43 |             out_image.write(content)
44 |         plt.imshow(mmcv.imread(args.result_image, 'grayscale'))
45 |         plt.show()
46 |     else:
47 |         plt.imshow(plt.imread(BytesIO(content)))
48 |         plt.show()
49 |     model = init_model(args.config, args.checkpoint, args.device)
50 |     image = mmcv.imread(args.img)
51 |     result = inference_model(model, image)
52 |     plt.imshow(result[0])
53 |     plt.show()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     args = parse_args()
58 |     main(args)
59 | 


--------------------------------------------------------------------------------
/2DVMamba/segmentation/vis/vis_seg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from mmseg.apis import init_model, inference_model, show_result_pyplot
 4 | 
 5 | 
 6 | # config_path = 'configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py'
 7 | # checkpoint_path = ('/scratch/KurcGroup/jingwei/gpfs/checkpoint/vmamba/'
 8 | #                    'upernet_vssm_4xb4-160k_ade20k-512x512_tiny_s_iter_160000.pth')
 9 | config_path = 'configs/vssm_2d/upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py'
10 | checkpoint_path = ('/gpfs/scratch/jingwezhang/result/v2dmamba/v2dmamba_fix/v2dmamba_t'
11 |                    '/segmentation/iter_160000.pth')
12 | img_path = 'demo/demo.png'
13 | validataion_path = ('/scratch/KurcGroup/jingwei/Projects/VMamba/segmentation/'
14 |                     'data/ade/ADEChallengeData2016/images/validation')
15 | out_dir = '/gpfs/scratch/jingwezhang/result/v2dmamba/v2dmamba_fix/v2dmamba_t/segmentation/'
16 | try:
17 |     import segmentation.model
18 | except:
19 |     import model
20 | 
21 | if __name__ == '__main__':
22 |     image_filenames = [f for f in os.listdir(validataion_path)]
23 | 
24 | 
25 | # build the model from a config file and a checkpoint file
26 | model = init_model(config_path, checkpoint_path, device='cuda:0')
27 | 
28 | # inference on given image
29 | result = inference_model(model, img_path)
30 | 
31 | # # display the segmentation result
32 | # vis_image = show_result_pyplot(model, img_path, result)
33 | 
34 | # save the visualization result, the output image would be found at the path `work_dirs/result.png`
35 | vis_iamge = show_result_pyplot(model, img_path, result, out_file='work_dirs/result.png')
36 | 
37 | # # Modify the time of displaying images, note that 0 is the special value that means "forever"
38 | # vis_image = show_result_pyplot(model, img_path, result, wait_time=5)


--------------------------------------------------------------------------------
/cuda_kernel/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -d "build" ]; then
 4 |     rm -r build
 5 | fi
 6 | 
 7 | mkdir build
 8 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR=/opt/conda -DCUDA_ARCHS="70;75;80" -DBOUNDARY_CHECK=1 -DNAN_SMEM_CHECK=1 -DNAN_GRAD_CHECK=1 -B build
 9 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/home/jzhang/Dev/anaconda3_2023/envs/vmamba" -DCUDA_ARCHS="70;75;80" -DBOUNDARY_CHECK=1 -B build
10 | cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/opt/conda" -DCUDA_ARCHS="70;75;80" -DOUTPUT_DIRECTORY=../../v2dmamba_scan -B build
11 | 
12 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/home/jzhang/Dev/anaconda3_2023/envs/vmamba" -DCUDA_ARCHS="70;75;80" -DOUTPUT_DIRECTORY=../v2dmamba_scan -B build
13 | 
14 | cmake --build build -- -j32


--------------------------------------------------------------------------------
/cuda_kernel/include/scan/commons.h:
--------------------------------------------------------------------------------
 1 | #ifndef NDMAMBA_COMMONS_H
 2 | #define NDMAMBA_COMMONS_H
 3 | 
 4 | 
 5 | namespace ndmamba
 6 | {
 7 | 
 8 | enum ScanDir : int
 9 | {
10 |     kHorizontal = 0,
11 |     kVertical = 1,
12 |     kHorizontalReversed = 2,
13 |     kVerticalReversed = 3,
14 | };
15 | 
16 | }  // namespace ndmamba
17 | 
18 | 
19 | #endif  // NDMAMBA_COMMONS_H


--------------------------------------------------------------------------------
/cuda_kernel/include/selective_scan/global.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef NDMAMBA_GLOBAL_CUH
 2 | #define NDMAMBA_GLOBAL_CUH
 3 | 
 4 | 
 5 | namespace ndmamba
 6 | {
 7 | 
 8 | // Actual span across input data per block.
 9 | inline constexpr int kMaxDimPerBlock = 32;
10 | 
11 | }   // namespace ndmamba
12 | 
13 | 
14 | #endif   // NDMAMBA_GLOBAL_CUH
15 | 


--------------------------------------------------------------------------------
/cuda_kernel/include/selective_scan/static_switch.cuh:
--------------------------------------------------------------------------------
 1 | // Inspired by https://github.com/NVIDIA/DALI/blob/main/include/dali/core/static_switch.h
 2 | // and https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Dispatch.h
 3 | 
 4 | #pragma once
 5 | 
 6 | /// @param COND       - a boolean expression to switch by
 7 | /// @param CONST_NAME - a name given for the constexpr bool variable.
 8 | /// @param ...       - code to execute for true and false
 9 | ///
10 | /// Usage:
11 | /// ```
12 | /// BOOL_SWITCH(flag, BoolConst, [&] {
13 | ///     some_function<BoolConst>(...);
14 | /// });
15 | /// ```
16 | #define BOOL_SWITCH(COND, CONST_NAME, ...)                                           \
17 |     [&] {                                                                            \
18 |         if (COND) {                                                                  \
19 |             constexpr bool CONST_NAME = true;                                        \
20 |             return __VA_ARGS__();                                                    \
21 |         } else {                                                                     \
22 |             constexpr bool CONST_NAME = false;                                       \
23 |             return __VA_ARGS__();                                                    \
24 |         }                                                                            \
25 |     }()
26 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/pscan.cu:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | 
 4 | std::vector<at::Tensor>
 5 | selective_scan_fwd(const at::Tensor & u,
 6 |                    const at::Tensor & delta,
 7 |                    const at::Tensor & A,
 8 |                    const at::Tensor & B,
 9 |                    const at::Tensor & C,
10 |                    const c10::optional<at::Tensor> & D_,
11 |                    const c10::optional<at::Tensor> & z_,
12 |                    const c10::optional<at::Tensor> & delta_bias_,
13 |                    bool delta_softplus,
14 |                    int height,
15 |                    int width,
16 |                    bool out_float = true);
17 | 
18 | 
19 | std::vector<at::Tensor>
20 | selective_scan_bwd(const at::Tensor & u,
21 |                    const at::Tensor & delta,
22 |                    const at::Tensor & A,
23 |                    const at::Tensor & B,
24 |                    const at::Tensor & C,
25 |                    const c10::optional<at::Tensor> & D_,
26 |                    const c10::optional<at::Tensor> & z_,
27 |                    const c10::optional<at::Tensor> & delta_bias_,
28 |                    const at::Tensor & dout,
29 |                    const c10::optional<at::Tensor> & x_,
30 |                    const c10::optional<at::Tensor> & out_,
31 |                    c10::optional<at::Tensor> & dz_,
32 |                    bool delta_softplus,
33 |                    bool recompute_out_z,
34 |                    int height,
35 |                    int width);
36 | 
37 | 
38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
39 | {
40 |     using namespace pybind11::literals;
41 | 
42 |     m.def("fwd",
43 |           &selective_scan_fwd,
44 |           "Selective scan forward",
45 |           "u"_a,
46 |           "delta"_a,
47 |           "A"_a,
48 |           "B"_a,
49 |           "C"_a,
50 |           "D_"_a,
51 |           "z_"_a,
52 |           "delta_bias_"_a,
53 |           "delta_softplus"_a,
54 |           "height"_a,
55 |           "width"_a,
56 |           "out_float"_a = true);
57 | 
58 |     m.def("bwd",
59 |           &selective_scan_bwd,
60 |           "u"_a,
61 |           "delta"_a,
62 |           "A"_a,
63 |           "B"_a,
64 |           "C"_a,
65 |           "D_"_a,
66 |           "z_"_a,
67 |           "delta_bias_"_a,
68 |           "dout"_a,
69 |           "x_"_a,
70 |           "out_"_a,
71 |           "dz_"_a,
72 |           "delta_softplus"_a,
73 |           "recompute_out_z"_a,
74 |           "height"_a,
75 |           "width"_a);
76 | }
77 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/repo/test_002_warp.cu:
--------------------------------------------------------------------------------
 1 | #include <cub/block/block_scan.cuh>
 2 | #include <cub/util_ptx.cuh>
 3 | #include <thrust/device_vector.h>
 4 | #include <thrust/host_vector.h>
 5 | 
 6 | #include "utils/cuda_utils.h"
 7 | #include "scan/block_scan.cuh"
 8 | 
 9 | 
10 | template <typename T>
11 | __global__ void test()
12 | {
13 |     using Scan = mamband::SegWarpScan<T, 8>;
14 |     typename Scan::TempStorage tempStorage;
15 |     Scan scan(tempStorage);
16 | 
17 |     T input = cub::LaneId() / 8;
18 |     T inclusiveOutput;
19 |     T segmentAggregate;
20 |     scan.InclusiveScan(input, inclusiveOutput, cub::Sum(), segmentAggregate);
21 |     printf("lane %u inclusiveOutput = %f warpAggregate = %f\n", cub::LaneId(), inclusiveOutput, segmentAggregate);
22 | }
23 | 
24 | 
25 | int main()
26 | {
27 | //    thrust::device_vector<float> d_out(8 * 16, 1.0f);
28 |     test<float><<<1, 32>>>();
29 |     CUDA_CHECK_LAST_ERROR();
30 |     CUDA_CHECK(cudaDeviceSynchronize());
31 | 
32 |     return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/repo/test_003_warp_hw.cu:
--------------------------------------------------------------------------------
 1 | #include <cub/block/block_scan.cuh>
 2 | #include <cub/util_ptx.cuh>
 3 | #include <thrust/device_vector.h>
 4 | #include <thrust/host_vector.h>
 5 | 
 6 | #include "utils/cuda_utils.h"
 7 | #include "scan/block_scan.cuh"
 8 | #include "scan/commons.h"
 9 | 
10 | 
11 | 
12 | template <typename T>
13 | struct ScanOp
14 | {
15 |     __device__ __forceinline__ T operator()(const T & a, const T & b) = delete;
16 | };
17 | 
18 | 
19 | template <>
20 | struct ScanOp<float2>
21 | {
22 |     __device__ __forceinline__ float2 operator()(const float2 & a, const float2 & b)
23 |     {
24 |         return {a.x + b.x, a.y + b.y};
25 |     }
26 | };
27 | 
28 | 
29 | __global__ void test()
30 | {
31 |     using Scan = mamband::SegWarpScan<float2, 8>;
32 |     typename Scan::TempStorage tempStorage;
33 |     Scan scan(tempStorage);
34 | 
35 |     ScanOp<float2> scanOp;
36 | 
37 |     float2 input;
38 |     input.x = cub::LaneId() / 8;
39 |     input.y = cub::LaneId() % 8;
40 | 
41 |     float2 segAgg;
42 | 
43 |     scan.InclusiveScan(input, input, scanOp, segAgg, mamband::kHorizontal);
44 |     printf("lane %u inclusiveOutput = %f %f warpAggregate = %f %f\n",
45 |            cub::LaneId(),
46 |            input.x, input.y,
47 |            segAgg.x, segAgg.y);
48 | 
49 |     printf("\n");
50 | 
51 |     scan.InclusiveScan(input, input, scanOp, segAgg, mamband::kVertical);
52 |     printf("lane %u inclusiveOutput = %f %f warpAggregate = %f %f\n",
53 |            cub::LaneId(),
54 |            input.x, input.y,
55 |            segAgg.x, segAgg.y);
56 | }
57 | 
58 | 
59 | int main()
60 | {
61 | //    thrust::device_vector<float> d_out(8 * 16, 1.0f);
62 |     test<<<1, 32>>>();
63 |     CUDA_CHECK_LAST_ERROR();
64 |     CUDA_CHECK(cudaDeviceSynchronize());
65 | 
66 |     return 0;
67 | }
68 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/repo/test_005_block_prefix_callback_op.cu:
--------------------------------------------------------------------------------
 1 | #include <numeric>
 2 | #include <random>
 3 | 
 4 | #include <cub/block/block_scan.cuh>
 5 | #include <cub/util_ptx.cuh>
 6 | #include <thrust/device_vector.h>
 7 | #include <thrust/host_vector.h>
 8 | 
 9 | #include "utils/cuda_utils.h"
10 | #include "scan/block_scan.cuh"
11 | #include "scan/commons.h"
12 | 
13 | 
14 | 
15 | template <typename T>
16 | struct ScanOp
17 | {
18 |     __device__ __forceinline__ T operator()(const T & a, const T & b) = delete;
19 | };
20 | 
21 | 
22 | template <>
23 | struct ScanOp<float>
24 | {
25 |     __device__ __forceinline__ float operator()(const float & a, const float & b)
26 |     {
27 |         return a + b;
28 |     }
29 | };
30 | 
31 | 
32 | template <>
33 | struct ScanOp<float2>
34 | {
35 |     __device__ __forceinline__ float2 operator()(const float2 & a, const float2 & b)
36 |     {
37 |         return {a.x + b.x, a.y + b.y};
38 |     }
39 | };
40 | 
41 | 
42 | template <typename ScanOp, typename T>
43 | struct BlockPrefixCallbackOp
44 | {
45 |     __device__ BlockPrefixCallbackOp(T runningPrefix) : runningPrefix(runningPrefix) {}
46 | 
47 |     __device__ T operator()(T blockAggregate)
48 |     {
49 |         T oldPrefix = runningPrefix;
50 |         runningPrefix = ScanOp()(runningPrefix, blockAggregate);
51 |         return oldPrefix;
52 |     }
53 | 
54 |     T runningPrefix = 0;
55 | };
56 | 
57 | 
58 | template <typename T>
59 | __global__ void scan()
60 | {
61 |     using Scan = cub::BlockScan<T, 64, cub::BLOCK_SCAN_WARP_SCANS>;
62 |     using BlockPrefixCallbackOp = BlockPrefixCallbackOp<cub::Sum, T>;
63 | 
64 |     __shared__ typename Scan::TempStorage tempStorage;
65 |     Scan scan(tempStorage);
66 | 
67 |     BlockPrefixCallbackOp blockPrefixCallbackOp(0);
68 | 
69 |     T input = 1;
70 |     scan.InclusiveScan(input, input, cub::Sum(), blockPrefixCallbackOp);
71 | 
72 |     input = 1;
73 |     scan.InclusiveScan(input, input, cub::Sum(), blockPrefixCallbackOp);
74 | 
75 |     const int tid = blockIdx.x * blockDim.x + threadIdx.x;
76 |     printf("tid %d input = %f\n", tid, input);
77 | 
78 |     if (tid == 0)
79 |     {
80 |         printf("blockPrefixCallbackOp.runningPrefix = %f\n", blockPrefixCallbackOp.runningPrefix);
81 |     }
82 | }
83 | 
84 | 
85 | int main()
86 | {
87 |     scan<float><<<1, 64>>>();
88 |     CUDA_CHECK_LAST_ERROR();
89 |     CUDA_CHECK(cudaDeviceSynchronize());
90 | 
91 |     return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/selective_scan/selective_scan_bwd_kernel_fp16.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | 
 5 | // Split into multiple files to compile in paralell
 6 | 
 7 | #include "selective_scan/selective_scan_bwd_kernel.cuh"
 8 | 
 9 | 
10 | template void selective_scan_bwd_cuda<at::Half, float, at::Half>(SSMParamsBwd & params, cudaStream_t stream);
11 | 
12 | template void selective_scan_bwd_cuda<at::Half, float, float>(SSMParamsBwd & params, cudaStream_t stream);
13 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/selective_scan/selective_scan_bwd_kernel_fp32.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | 
 5 | // Split into multiple files to compile in paralell
 6 | 
 7 | #include "selective_scan/selective_scan_bwd_kernel.cuh"
 8 | 
 9 | 
10 | template void selective_scan_bwd_cuda<float, float, float>(SSMParamsBwd & params, cudaStream_t stream);
11 | 
12 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/selective_scan/selective_scan_fwd_kernel_fp16.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | 
 5 | // Split into multiple files to compile in paralell
 6 | 
 7 | #include "selective_scan/selective_scan_fwd_kernel.cuh"
 8 | 
 9 | 
10 | template void selective_scan_fwd_cuda<at::Half, float, at::Half>(SSMParamsBase & params, cudaStream_t stream);
11 | 
12 | template void selective_scan_fwd_cuda<at::Half, float, float>(SSMParamsBase & params, cudaStream_t stream);
13 | 


--------------------------------------------------------------------------------
/cuda_kernel/src/selective_scan/selective_scan_fwd_kernel_fp32.cu:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2023, Tri Dao.
 3 |  ******************************************************************************/
 4 | 
 5 | // Split into multiple files to compile in paralell
 6 | 
 7 | #include "selective_scan/selective_scan_fwd_kernel.cuh"
 8 | 
 9 | 
10 | template void selective_scan_fwd_cuda<float, float, float>(SSMParamsBase & params, cudaStream_t stream);
11 | 


--------------------------------------------------------------------------------
/misc/compare.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/compare.jpg


--------------------------------------------------------------------------------
/misc/cuda.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/cuda.jpg


--------------------------------------------------------------------------------
/misc/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/overview.jpg


--------------------------------------------------------------------------------
/misc/overview_github.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/overview_github.jpg


--------------------------------------------------------------------------------
/v2dmamba_scan/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from .pscan import *
3 | 


--------------------------------------------------------------------------------