├── .gitignore ├── 2DMambaMIL ├── README.md ├── dataset │ ├── csv_file │ ├── csv_files │ │ ├── classification │ │ │ ├── BRACS.csv │ │ │ ├── DHMC.csv │ │ │ ├── PANDA.csv │ │ │ ├── TCGA-BRCA-label.csv │ │ │ ├── TCGA-BRCA-split.csv │ │ │ ├── TCGA-NSCLC-label.csv.zip │ │ │ └── TCGA-NSCLC-split.csv │ │ └── survival │ │ │ ├── KIRC.csv │ │ │ ├── KIRP.csv │ │ │ ├── LUAD.csv │ │ │ ├── STAD.csv │ │ │ ├── TCGA_KIRC_survival_kfold │ │ │ ├── splits_0.csv │ │ │ ├── splits_1.csv │ │ │ ├── splits_2.csv │ │ │ ├── splits_3.csv │ │ │ └── splits_4.csv │ │ │ ├── TCGA_KIRP_survival_kfold │ │ │ ├── splits_0.csv │ │ │ ├── splits_1.csv │ │ │ ├── splits_2.csv │ │ │ ├── splits_3.csv │ │ │ └── splits_4.csv │ │ │ ├── TCGA_LUAD_survival_kfold │ │ │ ├── splits_0.csv │ │ │ ├── splits_1.csv │ │ │ ├── splits_2.csv │ │ │ ├── splits_3.csv │ │ │ └── splits_4.csv │ │ │ ├── TCGA_STAD_survival_kfold │ │ │ ├── splits_0.csv │ │ │ ├── splits_1.csv │ │ │ ├── splits_2.csv │ │ │ ├── splits_3.csv │ │ │ └── splits_4.csv │ │ │ ├── TCGA_UCEC_survival_kfold │ │ │ ├── splits_0.csv │ │ │ ├── splits_1.csv │ │ │ ├── splits_2.csv │ │ │ ├── splits_3.csv │ │ │ └── splits_4.csv │ │ │ └── UCEC.csv │ ├── dataset_generic.py │ └── dataset_survival.py ├── draw_heatmap.py ├── main.py ├── mamba_ssm │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── config_mamba.py │ │ └── mixer_seq_simple.py │ ├── modules │ │ ├── __init__.py │ │ ├── bimamba.py │ │ ├── mamba_simple.py │ │ └── srmamba.py │ ├── ops │ │ ├── __init__.py │ │ ├── selective_scan_interface.py │ │ └── triton │ │ │ ├── __init__.py │ │ │ ├── layernorm.py │ │ │ └── selective_state_update.py │ └── utils │ │ ├── __init__.py │ │ ├── generation.py │ │ └── hf.py ├── models │ ├── ABMIL.py │ ├── CLAM.py │ ├── DFDT.py │ ├── DSMIL.py │ ├── MambaMIL.py │ ├── MambaMIL_2D.py │ ├── S4MIL.py │ ├── TransMIL.py │ ├── __init__.py │ ├── mamba_simple.py │ ├── pscan.py │ ├── pscan_2d.py │ └── pscan_cuda │ │ └── __init__.py └── utils │ ├── __init__.py │ ├── core_utils.py │ ├── file_utils.py │ ├── survival_utils.py │ └── utils.py ├── 2DVMamba ├── classification │ ├── config.py │ ├── configs │ │ └── vssm_2d │ │ │ ├── vmambav2_2d_small_224.yaml │ │ │ └── vmambav2v_2d_tiny_224.yaml │ ├── data │ │ ├── __init__.py │ │ ├── build.py │ │ ├── cached_image_folder.py │ │ ├── data_simmim_ft.py │ │ ├── data_simmim_pt.py │ │ ├── imagenet22k_dataset.py │ │ ├── map22kto1k.txt │ │ ├── samplers.py │ │ └── zipreader.py │ ├── debug_model.py │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── csm_triton.py │ │ ├── csms6s.py │ │ ├── mamba2 │ │ │ ├── __init__.py │ │ │ ├── k_activations.py │ │ │ ├── layer_norm.py │ │ │ ├── layernorm_gated.py │ │ │ ├── selective_state_update.py │ │ │ ├── ssd_bmm.py │ │ │ ├── ssd_chunk_scan.py │ │ │ ├── ssd_chunk_state.py │ │ │ ├── ssd_combined.py │ │ │ ├── ssd_minimal.py │ │ │ └── ssd_state_passing.py │ │ ├── network_utils.py │ │ ├── vmamba.py │ │ └── vmamba_checks.py │ ├── readme.md │ └── utils │ │ ├── cosine_lr.py │ │ ├── logger.py │ │ ├── lr_scheduler.py │ │ ├── optimizer.py │ │ └── utils.py └── segmentation │ ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── ade20k.py │ │ │ ├── ade20k_640x640.py │ │ │ ├── bdd100k.py │ │ │ ├── chase_db1.py │ │ │ ├── cityscapes.py │ │ │ ├── cityscapes_1024x1024.py │ │ │ ├── cityscapes_768x768.py │ │ │ ├── cityscapes_769x769.py │ │ │ ├── cityscapes_832x832.py │ │ │ ├── coco-stuff10k.py │ │ │ ├── coco-stuff164k.py │ │ │ ├── drive.py │ │ │ ├── hrf.py │ │ │ ├── isaid.py │ │ │ ├── levir_256x256.py │ │ │ ├── loveda.py │ │ │ ├── mapillary_v1.py │ │ │ ├── mapillary_v1_65.py │ │ │ ├── mapillary_v2.py │ │ │ ├── nyu.py │ │ │ ├── nyu_512x512.py │ │ │ ├── pascal_context.py │ │ │ ├── pascal_context_59.py │ │ │ ├── pascal_voc12.py │ │ │ ├── pascal_voc12_aug.py │ │ │ ├── potsdam.py │ │ │ ├── refuge.py │ │ │ ├── stare.py │ │ │ ├── synapse.py │ │ │ └── vaihingen.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── ann_r50-d8.py │ │ │ ├── apcnet_r50-d8.py │ │ │ ├── bisenetv1_r18-d32.py │ │ │ ├── bisenetv2.py │ │ │ ├── ccnet_r50-d8.py │ │ │ ├── cgnet.py │ │ │ ├── danet_r50-d8.py │ │ │ ├── deeplabv3_r50-d8.py │ │ │ ├── deeplabv3_unet_s5-d16.py │ │ │ ├── deeplabv3plus_r50-d8.py │ │ │ ├── dmnet_r50-d8.py │ │ │ ├── dnl_r50-d8.py │ │ │ ├── dpt_vit-b16.py │ │ │ ├── emanet_r50-d8.py │ │ │ ├── encnet_r50-d8.py │ │ │ ├── erfnet_fcn.py │ │ │ ├── fast_scnn.py │ │ │ ├── fastfcn_r50-d32_jpu_psp.py │ │ │ ├── fcn_hr18.py │ │ │ ├── fcn_r50-d8.py │ │ │ ├── fcn_unet_s5-d16.py │ │ │ ├── fpn_poolformer_s12.py │ │ │ ├── fpn_r50.py │ │ │ ├── gcnet_r50-d8.py │ │ │ ├── icnet_r50-d8.py │ │ │ ├── isanet_r50-d8.py │ │ │ ├── lraspp_m-v3-d8.py │ │ │ ├── nonlocal_r50-d8.py │ │ │ ├── ocrnet_hr18.py │ │ │ ├── ocrnet_r50-d8.py │ │ │ ├── pointrend_r50.py │ │ │ ├── psanet_r50-d8.py │ │ │ ├── pspnet_r50-d8.py │ │ │ ├── pspnet_unet_s5-d16.py │ │ │ ├── san_vit-b16.py │ │ │ ├── segformer_mit-b0.py │ │ │ ├── segmenter_vit-b16_mask.py │ │ │ ├── setr_mla.py │ │ │ ├── setr_naive.py │ │ │ ├── setr_pup.py │ │ │ ├── stdc.py │ │ │ ├── twins_pcpvt-s_fpn.py │ │ │ ├── twins_pcpvt-s_upernet.py │ │ │ ├── upernet_beit.py │ │ │ ├── upernet_convnext.py │ │ │ ├── upernet_mae.py │ │ │ ├── upernet_r50.py │ │ │ ├── upernet_swin.py │ │ │ ├── upernet_vit-b16_ln_mln.py │ │ │ └── vpd_sd.py │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_240k.py │ │ │ ├── schedule_25k.py │ │ │ ├── schedule_320k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ ├── convnext │ │ ├── README.md │ │ ├── convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py │ │ ├── convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py │ │ ├── convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py │ │ ├── convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py │ │ ├── convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py │ │ ├── convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py │ │ └── metafile.yaml │ ├── swin │ │ ├── README.md │ │ ├── metafile.yaml │ │ ├── swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ │ └── swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py │ ├── upernet │ │ ├── README.md │ │ ├── metafile.yaml │ │ ├── upernet_r101_4xb2-40k_cityscapes-512x1024.py │ │ ├── upernet_r101_4xb2-40k_cityscapes-769x769.py │ │ ├── upernet_r101_4xb2-80k_cityscapes-512x1024.py │ │ ├── upernet_r101_4xb2-80k_cityscapes-769x769.py │ │ ├── upernet_r101_4xb4-160k_ade20k-512x512.py │ │ ├── upernet_r101_4xb4-20k_voc12aug-512x512.py │ │ ├── upernet_r101_4xb4-40k_voc12aug-512x512.py │ │ ├── upernet_r101_4xb4-80k_ade20k-512x512.py │ │ ├── upernet_r18_4xb2-40k_cityscapes-512x1024.py │ │ ├── upernet_r18_4xb2-80k_cityscapes-512x1024.py │ │ ├── upernet_r18_4xb4-160k_ade20k-512x512.py │ │ ├── upernet_r18_4xb4-20k_voc12aug-512x512.py │ │ ├── upernet_r18_4xb4-40k_voc12aug-512x512.py │ │ ├── upernet_r18_4xb4-80k_ade20k-512x512.py │ │ ├── upernet_r50_4xb2-40k_cityscapes-512x1024.py │ │ ├── upernet_r50_4xb2-40k_cityscapes-769x769.py │ │ ├── upernet_r50_4xb2-80k_cityscapes-512x1024.py │ │ ├── upernet_r50_4xb2-80k_cityscapes-769x769.py │ │ ├── upernet_r50_4xb4-160k_ade20k-512x512.py │ │ ├── upernet_r50_4xb4-20k_voc12aug-512x512.py │ │ ├── upernet_r50_4xb4-40k_voc12aug-512x512.py │ │ └── upernet_r50_4xb4-80k_ade20k-512x512.py │ ├── vit │ │ ├── README.md │ │ ├── metafile.yaml │ │ ├── vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py │ │ ├── vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py │ │ ├── vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ ├── vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py │ │ └── vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py │ └── vssm_2d │ │ └── upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py │ ├── model.py │ ├── readme.md │ ├── tools │ ├── analysis_tools │ │ ├── analyze_logs.py │ │ ├── benchmark.py │ │ ├── browse_dataset.py │ │ ├── confusion_matrix.py │ │ ├── get_flops.py │ │ └── visualization_cam.py │ ├── dataset_converters │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── coco_stuff10k.py │ │ ├── coco_stuff164k.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── isaid.py │ │ ├── levircd.py │ │ ├── loveda.py │ │ ├── nyu.py │ │ ├── pascal_context.py │ │ ├── potsdam.py │ │ ├── refuge.py │ │ ├── stare.py │ │ ├── synapse.py │ │ ├── vaihingen.py │ │ └── voc_aug.py │ ├── deployment │ │ └── pytorch2torchscript.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── misc │ │ ├── browse_dataset.py │ │ ├── print_config.py │ │ └── publish_model.py │ ├── model_converters │ │ ├── beit2mmseg.py │ │ ├── clip2mmseg.py │ │ ├── mit2mmseg.py │ │ ├── san2mmseg.py │ │ ├── stdc2mmseg.py │ │ ├── swin2mmseg.py │ │ ├── twins2mmseg.py │ │ ├── vit2mmseg.py │ │ └── vitjax2mmseg.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ ├── torchserve │ │ ├── mmseg2torchserve.py │ │ ├── mmseg_handler.py │ │ └── test_torchserve.py │ └── train.py │ └── vis │ └── vis_seg.py ├── README.md ├── cuda_kernel ├── CMakeLists.txt ├── build.sh ├── include │ ├── scan │ │ ├── block_scan.cuh │ │ ├── block_scan_warp_scans.cuh │ │ ├── commons.h │ │ ├── thread_reduce.cuh │ │ ├── thread_scan.cuh │ │ ├── warp_scan.cuh │ │ └── warp_scan_shfl.cuh │ ├── selective_scan │ │ ├── global.cuh │ │ ├── selective_scan.cuh │ │ ├── selective_scan_bwd_kernel.cuh │ │ ├── selective_scan_common.cuh │ │ ├── selective_scan_fwd_kernel.cuh │ │ └── static_switch.cuh │ └── utils │ │ └── cuda_utils.h └── src │ ├── pscan.cu │ ├── repo │ ├── scan.cuh │ ├── test_001.cu │ ├── test_002_warp.cu │ ├── test_003_warp_hw.cu │ ├── test_005_block_prefix_callback_op.cu │ ├── test_014_block.cu │ ├── test_015_block_with_prefix_callback.cu │ ├── test_016_block_exclusive_scan.cu │ ├── test_017_block_scan_inclusive_array.cu │ ├── test_018_blk_scan_arr_prefix_callback.cu │ ├── test_019_rev_blk_scan_scalar_prefix_callback.cu │ ├── test_020_rev_blk_scan_arr_no_preix_callback.cu │ ├── test_021_rev_blk_scan_arr_prefix_callback.cu │ └── uninitialized_copy.cuh │ ├── selective_scan │ ├── selective_scan_bwd.cu │ ├── selective_scan_bwd_kernel_fp16.cu │ ├── selective_scan_bwd_kernel_fp32.cu │ ├── selective_scan_fwd.cu │ ├── selective_scan_fwd_kernel_fp16.cu │ └── selective_scan_fwd_kernel_fp32.cu │ ├── test_arr.cu │ └── test_non_arr.cu ├── misc ├── compare.jpg ├── cuda.jpg ├── overview.jpg └── overview_github.jpg └── v2dmamba_scan └── __init__.py /2DMambaMIL/README.md: -------------------------------------------------------------------------------- 1 | # 2DMambaMIL 2 | 3 | We prepared the extracted feature in h5 files with the same format from [CLAM library]([/guides/content/editing-an-existing-page](https://github.com/mahmoodlab/CLAM)). 4 | After preparation, please replace the corresponding h5 directory for the argument `--h5_path`. For CUDA scan, please use the flag `--cuda_pscan`. 5 | 6 | Sample script to run experiments. 7 | 8 | ``` 9 | cd 2DMambaMIL/2DMambaMIL 10 | 11 | CUDA_VISIBLE_DEVICES=0 python main.py --task BRACS --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path BRACS_uni/h5_files 12 | CUDA_VISIBLE_DEVICES=0 python main.py --task BRCA --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path BRCA_uni/h5_files 13 | CUDA_VISIBLE_DEVICES=0 python main.py --task DHMC --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path DHMC_uni/h5_files 14 | CUDA_VISIBLE_DEVICES=0 python main.py --task NSCLC --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path NSCLC_uni/h5_files 15 | CUDA_VISIBLE_DEVICES=0 python main.py --task PANDA --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path PANDA_uni/h5_files 16 | 17 | CUDA_VISIBLE_DEVICES=0 python main.py --task KIRC --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path KIRC_uni/h5_files 18 | CUDA_VISIBLE_DEVICES=0 python main.py --task KIRP --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path KIRP_uni/h5_files 19 | CUDA_VISIBLE_DEVICES=0 python main.py --task LUAD --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path LUAD_uni/h5_files 20 | CUDA_VISIBLE_DEVICES=0 python main.py --task STAD --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path STAD_uni/h5_files 21 | CUDA_VISIBLE_DEVICES=0 python main.py --task UCEC --survival --model_type 2DMambaMIL --seed 0 --cuda_pscan --h5_path UCEC_uni/h5_files 22 | ``` 23 | -------------------------------------------------------------------------------- /2DMambaMIL/dataset/csv_files/classification/TCGA-NSCLC-label.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/dataset/csv_files/classification/TCGA-NSCLC-label.csv.zip -------------------------------------------------------------------------------- /2DMambaMIL/draw_heatmap.py: -------------------------------------------------------------------------------- 1 | from utils.utils import WholeSlideImage 2 | import torch 3 | import glob 4 | import h5py 5 | import yaml 6 | import argparse 7 | import os 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--model_path', default='MaP', required=True) 11 | parser.add_argument('--survival', action='store_true', default=False, required=True) 12 | parser.add_argument('--slide_folder', required=True) 13 | parser.add_argument('--h5_folder', required=True) 14 | parser.add_argument('--heatmap_save_dir', required=True) 15 | args = parser.parse_args() 16 | 17 | device = torch.device('cuda') 18 | model_path = f'{args.model_path}' 19 | model = torch.load(model_path).to(device) 20 | model.survival = args.survival 21 | 22 | for path in glob.glob(args.slide_folder)[:]: 23 | slide_id = path.split('/')[-1][:-4] 24 | count_relevance = 0 25 | print(slide_id) 26 | try: 27 | data = h5py.File(f'{args.h5_folder}/{slide_id}.h5') 28 | except: 29 | print(f'Cannot found h5 file for: {slide_id}') 30 | continue 31 | slide_feats = torch.tensor(data['features'][:]).to(device) 32 | coords = torch.tensor(data['coords'][:]).to(device) 33 | 34 | _, _, prediction, attention, _ = model(slide_feats) 35 | attention = attention.cpu().detach().numpy() 36 | 37 | wsi = WholeSlideImage(path) 38 | if len(wsi.level_dim) > 3: 39 | vis_level = 3 40 | else: 41 | vis_level = 2 42 | 43 | heatmap = wsi.visHeatmap( 44 | scores=attention, 45 | coords=data['coords'][:], 46 | patch_size = (512,512), 47 | blur = True, 48 | overlap=0.0, 49 | cmap = 'jet', 50 | convert_to_percentiles = True, 51 | vis_level = vis_level 52 | ) 53 | os.makedirs(f'{args.heatmap_save_dir}/', exist_ok=True) 54 | heatmap.save(f'{args.heatmap_save_dir}/{slide_id}.png') 55 | -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/__init__.py: -------------------------------------------------------------------------------- 1 | # __version__ = "1.1.2" 2 | 3 | # from mamba.mamba_ssm.ops.selective_scan_interface import selective_scan_fn, mamba_inner_fn 4 | # from mamba.mamba_ssm.modules.mamba_simple import Mamba 5 | # from mamba.mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel 6 | # from mamba.mamba_ssm.modules.srmamba import SRMamba 7 | # from mamba.mamba_ssm.modules.bimamba import BiMamba 8 | # from mamba.mamba_ssm.mamba_simple.mamba import Mamba, MambaConfig 9 | 10 | 11 | __version__ = "1.1.2" 12 | 13 | from mamba.mamba_ssm.ops.selective_scan_interface import selective_scan_fn, mamba_inner_fn 14 | from mamba.mamba_ssm.modules.mamba_simple import Mamba 15 | from mamba.mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel 16 | from mamba.mamba_ssm.modules.srmamba import SRMamba 17 | from mamba.mamba_ssm.modules.bimamba import BiMamba -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/models/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/models/config_mamba.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | 4 | @dataclass 5 | class MambaConfig: 6 | 7 | d_model: int = 2560 8 | n_layer: int = 64 9 | vocab_size: int = 50277 10 | ssm_cfg: dict = field(default_factory=dict) 11 | rms_norm: bool = True 12 | residual_in_fp32: bool = True 13 | fused_add_norm: bool = True 14 | pad_vocab_size_multiple: int = 8 15 | -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/modules/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/ops/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/ops/triton/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/ops/triton/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/mamba_ssm/utils/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/mamba_ssm/utils/hf.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import torch 4 | 5 | from transformers.utils import WEIGHTS_NAME, CONFIG_NAME 6 | from transformers.utils.hub import cached_file 7 | 8 | 9 | def load_config_hf(model_name): 10 | resolved_archive_file = cached_file(model_name, CONFIG_NAME, _raise_exceptions_for_missing_entries=False) 11 | return json.load(open(resolved_archive_file)) 12 | 13 | 14 | def load_state_dict_hf(model_name, device=None, dtype=None): 15 | # If not fp32, then we don't want to load directly to the GPU 16 | mapped_device = "cpu" if dtype not in [torch.float32, None] else device 17 | resolved_archive_file = cached_file(model_name, WEIGHTS_NAME, _raise_exceptions_for_missing_entries=False) 18 | return torch.load(resolved_archive_file, map_location=mapped_device) 19 | # Convert dtype before moving to GPU to save memory 20 | if dtype is not None: 21 | state_dict = {k: v.to(dtype=dtype) for k, v in state_dict.items()} 22 | state_dict = {k: v.to(device=device) for k, v in state_dict.items()} 23 | return state_dict 24 | -------------------------------------------------------------------------------- /2DMambaMIL/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/2DMambaMIL/models/__init__.py -------------------------------------------------------------------------------- /2DMambaMIL/models/pscan_cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .pscan import * 2 | -------------------------------------------------------------------------------- /2DMambaMIL/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/home/yihui/VscodeFiles/Multimodel_Pretrain/Fearture_extractor/utils') -------------------------------------------------------------------------------- /2DMambaMIL/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | def save_pkl(filename, save_object): 4 | writer = open(filename,'wb') 5 | pickle.dump(save_object, writer) 6 | writer.close() 7 | 8 | def load_pkl(filename): 9 | loader = open(filename,'rb') 10 | file = pickle.load(loader) 11 | loader.close() 12 | return file 13 | 14 | 15 | -------------------------------------------------------------------------------- /2DVMamba/classification/configs/vssm_2d/vmambav2_2d_small_224.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: vssm 3 | NAME: vssm1_small_0229 4 | DROP_PATH_RATE: 0.3 5 | VSSM: 6 | EMBED_DIM: 96 7 | DEPTHS: [ 2, 2, 15, 2 ] 8 | SSM_D_STATE: 1 9 | SSM_DT_RANK: "auto" 10 | SSM_RATIO: 2.0 11 | SSM_CONV: 3 12 | SSM_CONV_BIAS: false 13 | SSM_FORWARDTYPE: "v05_noz" # v3_noz 14 | MLP_RATIO: 4.0 15 | DOWNSAMPLE: "v3" 16 | PATCHEMBED: "v2" 17 | NORM_LAYER: "ln2d" 18 | USE_V2D: True 19 | -------------------------------------------------------------------------------- /2DVMamba/classification/configs/vssm_2d/vmambav2v_2d_tiny_224.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: vssm 3 | NAME: vssm1_tiny_0230s 4 | DROP_PATH_RATE: 0.2 5 | VSSM: 6 | EMBED_DIM: 96 7 | DEPTHS: [ 2, 2, 8, 2 ] 8 | SSM_D_STATE: 1 9 | SSM_DT_RANK: "auto" 10 | SSM_RATIO: 1.0 11 | SSM_CONV: 3 12 | SSM_CONV_BIAS: false 13 | SSM_FORWARDTYPE: "v05_noz" # v3_noz 14 | MLP_RATIO: 4.0 15 | DOWNSAMPLE: "v3" 16 | PATCHEMBED: "v2" 17 | NORM_LAYER: "ln2d" 18 | USE_V2D: True 19 | -------------------------------------------------------------------------------- /2DVMamba/classification/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_loader as _build_loader 2 | from .data_simmim_pt import build_loader_simmim 3 | from .data_simmim_ft import build_loader_finetune 4 | 5 | 6 | def build_loader(config, simmim=False, is_pretrain=False): 7 | if not simmim: 8 | return _build_loader(config) 9 | if is_pretrain: 10 | return build_loader_simmim(config) 11 | else: 12 | return build_loader_finetune(config) 13 | -------------------------------------------------------------------------------- /2DVMamba/classification/data/imagenet22k_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch.utils.data as data 4 | import numpy as np 5 | from PIL import Image 6 | 7 | import warnings 8 | 9 | warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning) 10 | 11 | 12 | class IN22KDATASET(data.Dataset): 13 | def __init__(self, root, ann_file='', transform=None, target_transform=None): 14 | super(IN22KDATASET, self).__init__() 15 | 16 | self.data_path = root 17 | self.ann_path = os.path.join(self.data_path, ann_file) 18 | self.transform = transform 19 | self.target_transform = target_transform 20 | # id & label: https://github.com/google-research/big_transfer/issues/7 21 | # total: 21843; only 21841 class have images: map 21841->9205; 21842->15027 22 | self.database = json.load(open(self.ann_path)) 23 | 24 | def _load_image(self, path): 25 | try: 26 | im = Image.open(path) 27 | except: 28 | print("ERROR IMG LOADED: ", path) 29 | random_img = np.random.rand(224, 224, 3) * 255 30 | im = Image.fromarray(np.uint8(random_img)) 31 | return im 32 | 33 | def __getitem__(self, index): 34 | """ 35 | Args: 36 | index (int): Index 37 | Returns: 38 | tuple: (image, target) where target is class_index of the target class. 39 | """ 40 | idb = self.database[index] 41 | 42 | # images 43 | images = self._load_image(self.data_path + '/' + idb[0]).convert('RGB') 44 | if self.transform is not None: 45 | images = self.transform(images) 46 | 47 | # target 48 | target = int(idb[1]) 49 | if self.target_transform is not None: 50 | target = self.target_transform(target) 51 | 52 | return images, target 53 | 54 | def __len__(self): 55 | return len(self.database) 56 | -------------------------------------------------------------------------------- /2DVMamba/classification/data/samplers.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Swin Transformer 3 | # Copyright (c) 2021 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ze Liu 6 | # -------------------------------------------------------- 7 | 8 | import torch 9 | 10 | 11 | class SubsetRandomSampler(torch.utils.data.Sampler): 12 | r"""Samples elements randomly from a given list of indices, without replacement. 13 | 14 | Arguments: 15 | indices (sequence): a sequence of indices 16 | """ 17 | 18 | def __init__(self, indices): 19 | self.epoch = 0 20 | self.indices = indices 21 | 22 | def __iter__(self): 23 | return (self.indices[i] for i in torch.randperm(len(self.indices))) 24 | 25 | def __len__(self): 26 | return len(self.indices) 27 | 28 | def set_epoch(self, epoch): 29 | self.epoch = epoch 30 | -------------------------------------------------------------------------------- /2DVMamba/classification/debug_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | def permute(x, hw_shape, B, direction_Bs, permute_B=False): 3 | # x: B, L, D(E) 4 | # B: B, d_state, L 5 | # direction_Bs: 4, dstate 6 | H, W = hw_shape 7 | BB, L, D = x.shape 8 | x = x.reshape(BB, H, W, D) 9 | 10 | x_1 = x.permute(0, 3, 1, 2) # [B, L, H, W] 11 | HW_1 = (H, W) 12 | 13 | x_2 = x_1.permute(0, 1, 3, 2) # [B, L, W, H] 14 | HW_2 = (W, H) 15 | 16 | x_1 = x_1.flatten(2) 17 | x_2 = x_2.flatten(2) 18 | 19 | x_3 = x_1.flip(-1) 20 | HW_3 = HW_1 21 | x_4 = x_2.flip(-1) 22 | HW_4 = HW_2 23 | 24 | if permute_B: 25 | B = B.reshape(B.shape[0], B.shape[1], H, W) 26 | B1 = B.flatten(2) 27 | B2 = B.permute(0, 1, 3, 2).flatten(2) 28 | B3 = B1.flip(-1) 29 | B4 = B2.flip(-1) 30 | Bs = [B1, B2, B3, B4] 31 | else: 32 | Bs = [B, B, B, B] 33 | 34 | dBs = [db[None, :, None] for db in direction_Bs] 35 | 36 | return [x_1, x_2, x_3, x_4], [HW_1, HW_2, HW_3, HW_4], Bs, dBs 37 | 38 | def unpermute_and_sum(ys, H, W): 39 | # ys list of 4 [B, D, L] 40 | ys0 = ys[0] 41 | ys1 = ys[1] 42 | ys2 = ys[2].flip(-1) 43 | ys3 = ys[3].flip(-1) 44 | 45 | ys02 = ys0 + ys2 46 | ys13 = ys1 + ys3 47 | ys13 = ys13.reshape(ys13.shape[0], ys13.shape[1], W, H) 48 | ys13 = ys13.permute(0, 1, 3, 2).flatten(2) 49 | ys_out = ys02 + ys13 50 | ys_out = ys_out.permute(0, 2, 1) 51 | return ys_out 52 | 53 | 54 | if __name__ == '__main__': 55 | x = torch.range(0, 5).reshape(1, 6, 1) 56 | hw_shape = (2, 3) 57 | B = torch.range(0, 5).reshape(1, 1, 6) 58 | direction_Bs = torch.range(0, 3).reshape(4, 1) 59 | 60 | xs, HWs, Bs, dBs = permute(x, hw_shape, B, direction_Bs, permute_B=True) 61 | 62 | ys_out = unpermute_and_sum(xs, *hw_shape) 63 | 64 | print(xs) 65 | print(HWs) 66 | print(Bs) 67 | print(dBs) 68 | print(ys_out) -------------------------------------------------------------------------------- /2DVMamba/classification/models/mamba2/__init__.py: -------------------------------------------------------------------------------- 1 | # all the code in this folder is copied from https://github.com/state-spaces/mamba/blob/main/mamba_ssm/ops/triton/ 2 | 3 | -------------------------------------------------------------------------------- /2DVMamba/classification/models/network_utils.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | from typing import Tuple 3 | 4 | import torch 5 | from torch import nn, Tensor 6 | from torch.nn import Parameter, init 7 | 8 | import torch.nn.functional as F 9 | 10 | 11 | class AffineFirstLayerNorm(nn.Module): 12 | __constants__ = ['affine_shape', 'normalized_shape', 'eps'] 13 | normalized_shape: Tuple[int, ...] 14 | eps: float 15 | elementwise_affine: bool 16 | 17 | def __init__(self, affine_shape, normalized_shape, eps: float = 1e-5, 18 | bias: bool = True, device=None, dtype=None) -> None: 19 | factory_kwargs = {'device': device, 'dtype': dtype} 20 | super().__init__() 21 | if isinstance(normalized_shape, numbers.Integral): 22 | # mypy error: incompatible types in assignment 23 | normalized_shape = (normalized_shape,) # type: ignore[assignment] 24 | self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type] 25 | 26 | if isinstance(affine_shape, numbers.Integral): 27 | # mypy error: incompatible types in assignment 28 | affine_shape = (affine_shape,) # type: ignore[assignment] 29 | self.affine_shape = tuple(affine_shape) # type: ignore[arg-type] 30 | 31 | self.eps = eps 32 | 33 | self.weight = Parameter(torch.empty(self.affine_shape, **factory_kwargs)) 34 | if bias: 35 | self.bias = Parameter(torch.empty(self.affine_shape, **factory_kwargs)) 36 | else: 37 | self.register_parameter('bias', None) 38 | 39 | 40 | self.reset_parameters() 41 | 42 | def reset_parameters(self) -> None: 43 | init.ones_(self.weight) 44 | if self.bias is not None: 45 | init.zeros_(self.bias) 46 | 47 | def forward(self, input: Tensor) -> Tensor: 48 | input = input * self.weight + self.bias 49 | return F.layer_norm( 50 | input, self.normalized_shape, None, None, self.eps) 51 | 52 | def extra_repr(self) -> str: 53 | return '{affine_shape}, {normalized_shape}, eps={eps}, '.format(**self.__dict__) 54 | -------------------------------------------------------------------------------- /2DVMamba/classification/readme.md: -------------------------------------------------------------------------------- 1 | ## origins 2 | 3 | based on https://github.com/microsoft/Swin-Transformer#20240103 4 | 5 | `main.py` and `utils/utils_ema.py` is modified from https://github.com/microsoft/Swin-Transformer#20240103, based on https://github.com/facebookresearch/ConvNeXt#20240103 6 | 7 | -------------------------------------------------------------------------------- /2DVMamba/classification/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Swin Transformer 3 | # Copyright (c) 2021 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ze Liu 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | import logging 11 | import functools 12 | from termcolor import colored 13 | 14 | 15 | @functools.lru_cache() 16 | def create_logger(output_dir, dist_rank=0, name=''): 17 | # create logger 18 | logger = logging.getLogger(name) 19 | logger.setLevel(logging.DEBUG) 20 | logger.propagate = False 21 | 22 | # create formatter 23 | fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' 24 | color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \ 25 | colored('(%(filename)s %(lineno)d)', 'yellow') + ': %(levelname)s %(message)s' 26 | 27 | # create console handlers for master process 28 | if dist_rank == 0: 29 | console_handler = logging.StreamHandler(sys.stdout) 30 | console_handler.setLevel(logging.DEBUG) 31 | console_handler.setFormatter( 32 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) 33 | logger.addHandler(console_handler) 34 | 35 | # create file handlers 36 | file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a') 37 | file_handler.setLevel(logging.DEBUG) 38 | file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) 39 | logger.addHandler(file_handler) 40 | 41 | return logger 42 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | crop_size = (512, 512) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', reduce_zero_label=True), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 512), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2048, 512), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations', reduce_zero_label=True), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='images/training', seg_map_path='annotations/training'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='images/validation', 63 | seg_map_path='annotations/validation'), 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 68 | test_evaluator = val_evaluator 69 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/ade20k_640x640.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | crop_size = (640, 640) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', reduce_zero_label=True), 8 | dict( 9 | type='RandomResize', 10 | scale=(2560, 640), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2560, 640), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations', reduce_zero_label=True), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='images/training', seg_map_path='annotations/training'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='images/validation', 63 | seg_map_path='annotations/validation'), 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 68 | test_evaluator = val_evaluator 69 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/bdd100k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'BDD100KDataset' 3 | data_root = 'data/bdd100k/' 4 | 5 | crop_size = (512, 1024) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations'), 9 | dict( 10 | type='RandomResize', 11 | scale=(2048, 1024), 12 | ratio_range=(0.5, 2.0), 13 | keep_ratio=True), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='PackSegInputs') 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 22 | # add loading annotation after ``Resize`` because ground truth 23 | # does not need to do resize data transform 24 | dict(type='LoadAnnotations'), 25 | dict(type='PackSegInputs') 26 | ] 27 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 28 | tta_pipeline = [ 29 | dict(type='LoadImageFromFile', backend_args=None), 30 | dict( 31 | type='TestTimeAug', 32 | transforms=[ 33 | [ 34 | dict(type='Resize', scale_factor=r, keep_ratio=True) 35 | for r in img_ratios 36 | ], 37 | [ 38 | dict(type='RandomFlip', prob=0., direction='horizontal'), 39 | dict(type='RandomFlip', prob=1., direction='horizontal') 40 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 41 | ]) 42 | ] 43 | train_dataloader = dict( 44 | batch_size=2, 45 | num_workers=2, 46 | persistent_workers=True, 47 | sampler=dict(type='InfiniteSampler', shuffle=True), 48 | dataset=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | data_prefix=dict( 52 | img_path='images/10k/train', 53 | seg_map_path='labels/sem_seg/masks/train'), 54 | pipeline=train_pipeline)) 55 | val_dataloader = dict( 56 | batch_size=1, 57 | num_workers=4, 58 | persistent_workers=True, 59 | sampler=dict(type='DefaultSampler', shuffle=False), 60 | dataset=dict( 61 | type=dataset_type, 62 | data_root=data_root, 63 | data_prefix=dict( 64 | img_path='images/10k/val', 65 | seg_map_path='labels/sem_seg/masks/val'), 66 | pipeline=test_pipeline)) 67 | test_dataloader = val_dataloader 68 | 69 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 70 | test_evaluator = val_evaluator 71 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | crop_size = (512, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 1024), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations'), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=2, 44 | num_workers=2, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='leftImg8bit/train', seg_map_path='gtFine/train'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='leftImg8bit/val', seg_map_path='gtFine/val'), 63 | pipeline=test_pipeline)) 64 | test_dataloader = val_dataloader 65 | 66 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 67 | test_evaluator = val_evaluator 68 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (1024, 1024) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2048, 1024), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/cityscapes_768x768.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (768, 768) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2049, 1025), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2049, 1025), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (769, 769) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2049, 1025), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2049, 1025), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/cityscapes_832x832.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (832, 832) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2048, 1024), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/coco-stuff164k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuffDataset' 3 | data_root = 'data/coco_stuff164k' 4 | crop_size = (512, 512) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 512), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2048, 512), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations'), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='images/train2017', seg_map_path='annotations/train2017'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='images/val2017', seg_map_path='annotations/val2017'), 63 | pipeline=test_pipeline)) 64 | test_dataloader = val_dataloader 65 | 66 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 67 | test_evaluator = val_evaluator 68 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/levir_256x256.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'LEVIRCDDataset' 3 | data_root = r'data/LEVIRCD' 4 | 5 | albu_train_transforms = [ 6 | dict(type='RandomBrightnessContrast', p=0.2), 7 | dict(type='HorizontalFlip', p=0.5), 8 | dict(type='VerticalFlip', p=0.5) 9 | ] 10 | 11 | train_pipeline = [ 12 | dict(type='LoadMultipleRSImageFromFile'), 13 | dict(type='LoadAnnotations'), 14 | dict(type='Albu', transforms=albu_train_transforms), 15 | dict(type='ConcatCDInput'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadMultipleRSImageFromFile'), 20 | dict(type='LoadAnnotations'), 21 | dict(type='ConcatCDInput'), 22 | dict(type='PackSegInputs') 23 | ] 24 | 25 | tta_pipeline = [ 26 | dict(type='LoadMultipleRSImageFromFile'), 27 | dict( 28 | type='TestTimeAug', 29 | transforms=[[dict(type='LoadAnnotations')], 30 | [dict(type='ConcatCDInput')], 31 | [dict(type='PackSegInputs')]]) 32 | ] 33 | train_dataloader = dict( 34 | batch_size=4, 35 | num_workers=4, 36 | persistent_workers=True, 37 | sampler=dict(type='InfiniteSampler', shuffle=True), 38 | dataset=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | data_prefix=dict( 42 | img_path='train/A', 43 | img_path2='train/B', 44 | seg_map_path='train/label'), 45 | pipeline=train_pipeline)) 46 | val_dataloader = dict( 47 | batch_size=1, 48 | num_workers=4, 49 | persistent_workers=True, 50 | sampler=dict(type='DefaultSampler', shuffle=False), 51 | dataset=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | data_prefix=dict( 55 | img_path='test/A', img_path2='test/B', seg_map_path='test/label'), 56 | pipeline=test_pipeline)) 57 | test_dataloader = val_dataloader 58 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 59 | test_evaluator = val_evaluator 60 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'LoveDADataset' 3 | data_root = 'data/loveDA' 4 | crop_size = (512, 512) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', reduce_zero_label=True), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 512), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(1024, 1024), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations', reduce_zero_label=True), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='img_dir/train', seg_map_path='ann_dir/train'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), 62 | pipeline=test_pipeline)) 63 | test_dataloader = val_dataloader 64 | 65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 66 | test_evaluator = val_evaluator 67 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/mapillary_v1.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'MapillaryDataset_v1' 3 | data_root = 'data/mapillary/' 4 | crop_size = (512, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 1024), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations'), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=2, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='training/images', seg_map_path='training/v1.2/labels'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='validation/images', 63 | seg_map_path='validation/v1.2/labels'), 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 68 | test_evaluator = val_evaluator 69 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/mapillary_v2.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'MapillaryDataset_v2' 3 | data_root = 'data/mapillary/' 4 | crop_size = (512, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict( 9 | type='RandomResize', 10 | scale=(2048, 1024), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations'), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=2, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='training/images', seg_map_path='training/v2.0/labels'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict( 62 | img_path='validation/images', 63 | seg_map_path='validation/v2.0/labels'), 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 68 | test_evaluator = val_evaluator 69 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/nyu.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'NYUDataset' 3 | data_root = 'data/nyu' 4 | 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3), 8 | dict(type='RandomDepthMix', prob=0.25), 9 | dict(type='RandomFlip', prob=0.5), 10 | dict(type='RandomCrop', crop_size=(480, 480)), 11 | dict( 12 | type='Albu', 13 | transforms=[ 14 | dict(type='RandomBrightnessContrast'), 15 | dict(type='RandomGamma'), 16 | dict(type='HueSaturationValue'), 17 | ]), 18 | dict( 19 | type='PackSegInputs', 20 | meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', 21 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 22 | 'category_id')), 23 | ] 24 | 25 | test_pipeline = [ 26 | dict(type='LoadImageFromFile'), 27 | dict(type='Resize', scale=(2000, 480), keep_ratio=True), 28 | dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)), 29 | dict( 30 | type='PackSegInputs', 31 | meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', 32 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 33 | 'category_id')) 34 | ] 35 | 36 | train_dataloader = dict( 37 | batch_size=8, 38 | num_workers=8, 39 | persistent_workers=True, 40 | sampler=dict(type='InfiniteSampler', shuffle=True), 41 | dataset=dict( 42 | type=dataset_type, 43 | data_root=data_root, 44 | data_prefix=dict( 45 | img_path='images/train', depth_map_path='annotations/train'), 46 | pipeline=train_pipeline)) 47 | 48 | val_dataloader = dict( 49 | batch_size=1, 50 | num_workers=4, 51 | persistent_workers=True, 52 | sampler=dict(type='DefaultSampler', shuffle=False), 53 | dataset=dict( 54 | type=dataset_type, 55 | data_root=data_root, 56 | test_mode=True, 57 | data_prefix=dict( 58 | img_path='images/test', depth_map_path='annotations/test'), 59 | pipeline=test_pipeline)) 60 | test_dataloader = val_dataloader 61 | 62 | val_evaluator = dict( 63 | type='DepthMetric', 64 | min_depth_eval=0.001, 65 | max_depth_eval=10.0, 66 | crop_type='nyu_crop') 67 | test_evaluator = val_evaluator 68 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/nyu_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'NYUDataset' 3 | data_root = 'data/nyu' 4 | 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3), 8 | dict(type='RandomDepthMix', prob=0.25), 9 | dict(type='RandomFlip', prob=0.5), 10 | dict( 11 | type='RandomResize', 12 | scale=(768, 512), 13 | ratio_range=(0.8, 1.5), 14 | keep_ratio=True), 15 | dict(type='RandomCrop', crop_size=(512, 512)), 16 | dict( 17 | type='Albu', 18 | transforms=[ 19 | dict(type='RandomBrightnessContrast'), 20 | dict(type='RandomGamma'), 21 | dict(type='HueSaturationValue'), 22 | ]), 23 | dict( 24 | type='PackSegInputs', 25 | meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', 26 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 27 | 'category_id')), 28 | ] 29 | 30 | test_pipeline = [ 31 | dict(type='LoadImageFromFile'), 32 | dict(type='Resize', scale=(2048, 512), keep_ratio=True), 33 | dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)), 34 | dict( 35 | type='PackSegInputs', 36 | meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', 37 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 38 | 'category_id')) 39 | ] 40 | 41 | train_dataloader = dict( 42 | batch_size=8, 43 | num_workers=8, 44 | persistent_workers=True, 45 | sampler=dict(type='InfiniteSampler', shuffle=True), 46 | dataset=dict( 47 | type=dataset_type, 48 | data_root=data_root, 49 | data_prefix=dict( 50 | img_path='images/train', depth_map_path='annotations/train'), 51 | pipeline=train_pipeline)) 52 | 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | test_mode=True, 62 | data_prefix=dict( 63 | img_path='images/test', depth_map_path='annotations/test'), 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = dict( 68 | type='DepthMetric', 69 | min_depth_eval=0.001, 70 | max_depth_eval=10.0, 71 | crop_type='nyu_crop') 72 | test_evaluator = val_evaluator 73 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | 5 | img_scale = (520, 520) 6 | crop_size = (480, 480) 7 | 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict( 12 | type='RandomResize', 13 | scale=img_scale, 14 | ratio_range=(0.5, 2.0), 15 | keep_ratio=True), 16 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 17 | dict(type='RandomFlip', prob=0.5), 18 | dict(type='PhotoMetricDistortion'), 19 | dict(type='PackSegInputs') 20 | ] 21 | test_pipeline = [ 22 | dict(type='LoadImageFromFile'), 23 | dict(type='Resize', scale=img_scale, keep_ratio=True), 24 | # add loading annotation after ``Resize`` because ground truth 25 | # does not need to do resize data transform 26 | dict(type='LoadAnnotations'), 27 | dict(type='PackSegInputs') 28 | ] 29 | train_dataloader = dict( 30 | batch_size=4, 31 | num_workers=4, 32 | persistent_workers=True, 33 | sampler=dict(type='InfiniteSampler', shuffle=True), 34 | dataset=dict( 35 | type=dataset_type, 36 | data_root=data_root, 37 | data_prefix=dict( 38 | img_path='JPEGImages', seg_map_path='SegmentationClassContext'), 39 | ann_file='ImageSets/SegmentationContext/train.txt', 40 | pipeline=train_pipeline)) 41 | val_dataloader = dict( 42 | batch_size=1, 43 | num_workers=4, 44 | persistent_workers=True, 45 | sampler=dict(type='DefaultSampler', shuffle=False), 46 | dataset=dict( 47 | type=dataset_type, 48 | data_root=data_root, 49 | data_prefix=dict( 50 | img_path='JPEGImages', seg_map_path='SegmentationClassContext'), 51 | ann_file='ImageSets/SegmentationContext/val.txt', 52 | pipeline=test_pipeline)) 53 | test_dataloader = val_dataloader 54 | 55 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 56 | test_evaluator = val_evaluator 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PotsdamDataset' 3 | data_root = 'data/potsdam' 4 | crop_size = (512, 512) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', reduce_zero_label=True), 8 | dict( 9 | type='RandomResize', 10 | scale=(512, 512), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(512, 512), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations', reduce_zero_label=True), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='img_dir/train', seg_map_path='ann_dir/train'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), 62 | pipeline=test_pipeline)) 63 | test_dataloader = val_dataloader 64 | 65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 66 | test_evaluator = val_evaluator 67 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/synapse.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SynapseDataset' 2 | data_root = 'data/synapse/' 3 | img_scale = (224, 224) 4 | train_pipeline = [ 5 | dict(type='LoadImageFromFile'), 6 | dict(type='LoadAnnotations'), 7 | dict(type='Resize', scale=img_scale, keep_ratio=True), 8 | dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20), 9 | dict(type='PackSegInputs') 10 | ] 11 | test_pipeline = [ 12 | dict(type='LoadImageFromFile'), 13 | dict(type='Resize', scale=img_scale, keep_ratio=True), 14 | dict(type='LoadAnnotations'), 15 | dict(type='PackSegInputs') 16 | ] 17 | train_dataloader = dict( 18 | batch_size=6, 19 | num_workers=2, 20 | persistent_workers=True, 21 | sampler=dict(type='InfiniteSampler', shuffle=True), 22 | dataset=dict( 23 | type=dataset_type, 24 | data_root=data_root, 25 | data_prefix=dict( 26 | img_path='img_dir/train', seg_map_path='ann_dir/train'), 27 | pipeline=train_pipeline)) 28 | val_dataloader = dict( 29 | batch_size=1, 30 | num_workers=4, 31 | persistent_workers=True, 32 | sampler=dict(type='DefaultSampler', shuffle=False), 33 | dataset=dict( 34 | type=dataset_type, 35 | data_root=data_root, 36 | data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), 37 | pipeline=test_pipeline)) 38 | test_dataloader = val_dataloader 39 | 40 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) 41 | test_evaluator = val_evaluator 42 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/datasets/vaihingen.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ISPRSDataset' 3 | data_root = 'data/vaihingen' 4 | crop_size = (512, 512) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', reduce_zero_label=True), 8 | dict( 9 | type='RandomResize', 10 | scale=(512, 512), 11 | ratio_range=(0.5, 2.0), 12 | keep_ratio=True), 13 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 14 | dict(type='RandomFlip', prob=0.5), 15 | dict(type='PhotoMetricDistortion'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', scale=(512, 512), keep_ratio=True), 21 | # add loading annotation after ``Resize`` because ground truth 22 | # does not need to do resize data transform 23 | dict(type='LoadAnnotations', reduce_zero_label=True), 24 | dict(type='PackSegInputs') 25 | ] 26 | img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 27 | tta_pipeline = [ 28 | dict(type='LoadImageFromFile', backend_args=None), 29 | dict( 30 | type='TestTimeAug', 31 | transforms=[ 32 | [ 33 | dict(type='Resize', scale_factor=r, keep_ratio=True) 34 | for r in img_ratios 35 | ], 36 | [ 37 | dict(type='RandomFlip', prob=0., direction='horizontal'), 38 | dict(type='RandomFlip', prob=1., direction='horizontal') 39 | ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] 40 | ]) 41 | ] 42 | train_dataloader = dict( 43 | batch_size=4, 44 | num_workers=4, 45 | persistent_workers=True, 46 | sampler=dict(type='InfiniteSampler', shuffle=True), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | data_prefix=dict( 51 | img_path='img_dir/train', seg_map_path='ann_dir/train'), 52 | pipeline=train_pipeline)) 53 | val_dataloader = dict( 54 | batch_size=1, 55 | num_workers=4, 56 | persistent_workers=True, 57 | sampler=dict(type='DefaultSampler', shuffle=False), 58 | dataset=dict( 59 | type=dataset_type, 60 | data_root=data_root, 61 | data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), 62 | pipeline=test_pipeline)) 63 | test_dataloader = val_dataloader 64 | 65 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 66 | test_evaluator = val_evaluator 67 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmseg' 2 | env_cfg = dict( 3 | cudnn_benchmark=True, 4 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 5 | dist_cfg=dict(backend='nccl'), 6 | ) 7 | vis_backends = [dict(type='LocalVisBackend')] 8 | visualizer = dict( 9 | type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer') 10 | log_processor = dict(by_epoch=False) 11 | log_level = 'INFO' 12 | load_from = None 13 | resume = False 14 | 15 | tta_model = dict(type='SegTTAModel') 16 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/ann_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ANNHead', 27 | in_channels=[1024, 2048], 28 | in_index=[2, 3], 29 | channels=512, 30 | project_channels=256, 31 | query_scales=(1, ), 32 | key_pool_scales=(1, 3, 6, 8), 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/apcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='APCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pool_scales=(1, 2, 3, 6), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=dict(type='SyncBN', requires_grad=True), 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/ccnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='CCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | recurrence=2, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/cgnet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[72.39239876, 82.90891754, 73.15835921], 6 | std=[1, 1, 1], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | backbone=dict( 14 | type='CGNet', 15 | norm_cfg=norm_cfg, 16 | in_channels=3, 17 | num_channels=(32, 64, 128), 18 | num_blocks=(3, 21), 19 | dilations=(2, 4), 20 | reductions=(8, 16)), 21 | decode_head=dict( 22 | type='FCNHead', 23 | in_channels=256, 24 | in_index=2, 25 | channels=256, 26 | num_convs=0, 27 | concat_input=False, 28 | dropout_ratio=0, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | loss_decode=dict( 32 | type='CrossEntropyLoss', 33 | use_sigmoid=False, 34 | loss_weight=1.0, 35 | class_weight=[ 36 | 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, 37 | 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, 38 | 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, 39 | 10.396974, 10.055647 40 | ])), 41 | # model training and testing settings 42 | train_cfg=dict(sampler=None), 43 | test_cfg=dict(mode='whole')) 44 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pam_channels=64, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/deeplabv3_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ASPPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dilations=(1, 12, 24, 36), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='ASPPHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=16, 36 | dilations=(1, 12, 24, 36), 37 | dropout_ratio=0.1, 38 | num_classes=2, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=128, 46 | in_index=3, 47 | channels=64, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=2, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DepthwiseSeparableASPPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dilations=(1, 12, 24, 36), 31 | c1_in_channels=256, 32 | c1_channels=48, 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/dmnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DMHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | filter_sizes=(1, 3, 5, 7), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=dict(type='SyncBN', requires_grad=True), 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/dnl_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DNLHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dropout_ratio=0.1, 31 | reduction=2, 32 | use_scale=True, 33 | mode='embedded_gaussian', 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/dpt_vit-b16.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa 13 | backbone=dict( 14 | type='VisionTransformer', 15 | img_size=224, 16 | embed_dims=768, 17 | num_layers=12, 18 | num_heads=12, 19 | out_indices=(2, 5, 8, 11), 20 | final_norm=False, 21 | with_cls_token=True, 22 | output_cls_token=True), 23 | decode_head=dict( 24 | type='DPTHead', 25 | in_channels=(768, 768, 768, 768), 26 | channels=256, 27 | embed_dims=768, 28 | post_process_channels=[96, 192, 384, 768], 29 | num_classes=150, 30 | readout_type='project', 31 | input_transform='multiple_select', 32 | in_index=(0, 1, 2, 3), 33 | norm_cfg=norm_cfg, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | auxiliary_head=None, 37 | # model training and testing settings 38 | train_cfg=dict(), 39 | test_cfg=dict(mode='whole')) # yapf: disable 40 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/emanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='EMAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=256, 30 | ema_channels=512, 31 | num_bases=64, 32 | num_stages=3, 33 | momentum=0.1, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | auxiliary_head=dict( 41 | type='FCNHead', 42 | in_channels=1024, 43 | in_index=2, 44 | channels=256, 45 | num_convs=1, 46 | concat_input=False, 47 | dropout_ratio=0.1, 48 | num_classes=19, 49 | norm_cfg=norm_cfg, 50 | align_corners=False, 51 | loss_decode=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='whole')) 56 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/encnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='EncHead', 27 | in_channels=[512, 1024, 2048], 28 | in_index=(1, 2, 3), 29 | channels=512, 30 | num_codes=32, 31 | use_se_loss=True, 32 | add_lateral=False, 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_se_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), 41 | auxiliary_head=dict( 42 | type='FCNHead', 43 | in_channels=1024, 44 | in_index=2, 45 | channels=256, 46 | num_convs=1, 47 | concat_input=False, 48 | dropout_ratio=0.1, 49 | num_classes=19, 50 | norm_cfg=norm_cfg, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 54 | # model training and testing settings 55 | train_cfg=dict(), 56 | test_cfg=dict(mode='whole')) 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/erfnet_fcn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='ERFNet', 16 | in_channels=3, 17 | enc_downsample_channels=(16, 64, 128), 18 | enc_stage_non_bottlenecks=(5, 8), 19 | enc_non_bottleneck_dilations=(2, 4, 8, 16), 20 | enc_non_bottleneck_channels=(64, 128), 21 | dec_upsample_channels=(64, 16), 22 | dec_stages_non_bottleneck=(2, 2), 23 | dec_non_bottleneck_channels=(64, 16), 24 | dropout_ratio=0.1, 25 | init_cfg=None), 26 | decode_head=dict( 27 | type='FCNHead', 28 | in_channels=16, 29 | channels=128, 30 | num_convs=1, 31 | concat_input=False, 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | # model training and testing settings 39 | train_cfg=dict(), 40 | test_cfg=dict(mode='whole')) 41 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fast_scnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | backbone=dict( 14 | type='FastSCNN', 15 | downsample_dw_channels=(32, 48), 16 | global_in_channels=64, 17 | global_block_channels=(64, 96, 128), 18 | global_block_strides=(2, 2, 1), 19 | global_out_channels=128, 20 | higher_in_channels=64, 21 | lower_in_channels=128, 22 | fusion_out_channels=128, 23 | out_indices=(0, 1, 2), 24 | norm_cfg=norm_cfg, 25 | align_corners=False), 26 | decode_head=dict( 27 | type='DepthwiseSeparableFCNHead', 28 | in_channels=128, 29 | channels=128, 30 | concat_input=False, 31 | num_classes=19, 32 | in_index=-1, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)), 37 | auxiliary_head=[ 38 | dict( 39 | type='FCNHead', 40 | in_channels=128, 41 | channels=32, 42 | num_convs=1, 43 | num_classes=19, 44 | in_index=-2, 45 | norm_cfg=norm_cfg, 46 | concat_input=False, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 50 | dict( 51 | type='FCNHead', 52 | in_channels=64, 53 | channels=32, 54 | num_convs=1, 55 | num_classes=19, 56 | in_index=-3, 57 | norm_cfg=norm_cfg, 58 | concat_input=False, 59 | align_corners=False, 60 | loss_decode=dict( 61 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 62 | ], 63 | # model training and testing settings 64 | train_cfg=dict(), 65 | test_cfg=dict(mode='whole')) 66 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | dilations=(1, 1, 2, 4), 19 | strides=(1, 2, 2, 2), 20 | out_indices=(1, 2, 3), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | neck=dict( 26 | type='JPU', 27 | in_channels=(512, 1024, 2048), 28 | mid_channels=512, 29 | start_level=0, 30 | end_level=-1, 31 | dilations=(1, 2, 4, 8), 32 | align_corners=False, 33 | norm_cfg=norm_cfg), 34 | decode_head=dict( 35 | type='PSPHead', 36 | in_channels=2048, 37 | in_index=2, 38 | channels=512, 39 | pool_scales=(1, 2, 3, 6), 40 | dropout_ratio=0.1, 41 | num_classes=19, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 46 | auxiliary_head=dict( 47 | type='FCNHead', 48 | in_channels=1024, 49 | in_index=1, 50 | channels=256, 51 | num_convs=1, 52 | concat_input=False, 53 | dropout_ratio=0.1, 54 | num_classes=19, 55 | norm_cfg=norm_cfg, 56 | align_corners=False, 57 | loss_decode=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 59 | # model training and testing settings 60 | train_cfg=dict(), 61 | test_cfg=dict(mode='whole')) 62 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fcn_hr18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://msra/hrnetv2_w18', 14 | backbone=dict( 15 | type='HRNet', 16 | norm_cfg=norm_cfg, 17 | norm_eval=False, 18 | extra=dict( 19 | stage1=dict( 20 | num_modules=1, 21 | num_branches=1, 22 | block='BOTTLENECK', 23 | num_blocks=(4, ), 24 | num_channels=(64, )), 25 | stage2=dict( 26 | num_modules=1, 27 | num_branches=2, 28 | block='BASIC', 29 | num_blocks=(4, 4), 30 | num_channels=(18, 36)), 31 | stage3=dict( 32 | num_modules=4, 33 | num_branches=3, 34 | block='BASIC', 35 | num_blocks=(4, 4, 4), 36 | num_channels=(18, 36, 72)), 37 | stage4=dict( 38 | num_modules=3, 39 | num_branches=4, 40 | block='BASIC', 41 | num_blocks=(4, 4, 4, 4), 42 | num_channels=(18, 36, 72, 144)))), 43 | decode_head=dict( 44 | type='FCNHead', 45 | in_channels=[18, 36, 72, 144], 46 | in_index=(0, 1, 2, 3), 47 | channels=sum([18, 36, 72, 144]), 48 | input_transform='resize_concat', 49 | kernel_size=1, 50 | num_convs=1, 51 | concat_input=False, 52 | dropout_ratio=-1, 53 | num_classes=19, 54 | norm_cfg=norm_cfg, 55 | align_corners=False, 56 | loss_decode=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 58 | # model training and testing settings 59 | train_cfg=dict(), 60 | test_cfg=dict(mode='whole')) 61 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fcn_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='FCNHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | num_convs=2, 31 | concat_input=True, 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fcn_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='FCNHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=64, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=2, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 44 | auxiliary_head=dict( 45 | type='FCNHead', 46 | in_channels=128, 47 | in_index=3, 48 | channels=64, 49 | num_convs=1, 50 | concat_input=False, 51 | dropout_ratio=0.1, 52 | num_classes=2, 53 | norm_cfg=norm_cfg, 54 | align_corners=False, 55 | loss_decode=dict( 56 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 57 | # model training and testing settings 58 | train_cfg=dict(), 59 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 60 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fpn_poolformer_s12.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa 4 | # TODO: delete custom_imports after mmpretrain supports auto import 5 | # please install mmpretrain >= 1.0.0rc7 6 | # import mmpretrain.models to trigger register_module in mmpretrain 7 | custom_imports = dict( 8 | imports=['mmpretrain.models'], allow_failed_imports=False) 9 | data_preprocessor = dict( 10 | type='SegDataPreProcessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_val=0, 15 | seg_pad_val=255) 16 | model = dict( 17 | type='EncoderDecoder', 18 | data_preprocessor=data_preprocessor, 19 | backbone=dict( 20 | type='mmpretrain.PoolFormer', 21 | arch='s12', 22 | init_cfg=dict( 23 | type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'), 24 | in_patch_size=7, 25 | in_stride=4, 26 | in_pad=2, 27 | down_patch_size=3, 28 | down_stride=2, 29 | down_pad=1, 30 | drop_rate=0., 31 | drop_path_rate=0., 32 | out_indices=(0, 2, 4, 6), 33 | frozen_stages=0, 34 | ), 35 | neck=dict( 36 | type='FPN', 37 | in_channels=[256, 512, 1024, 2048], 38 | out_channels=256, 39 | num_outs=4), 40 | decode_head=dict( 41 | type='FPNHead', 42 | in_channels=[256, 256, 256, 256], 43 | in_index=[0, 1, 2, 3], 44 | feature_strides=[4, 8, 16, 32], 45 | channels=128, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 1, 1), 20 | strides=(1, 2, 2, 2), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | num_outs=4), 30 | decode_head=dict( 31 | type='FPNHead', 32 | in_channels=[256, 256, 256, 256], 33 | in_index=[0, 1, 2, 3], 34 | feature_strides=[4, 8, 16, 32], 35 | channels=128, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/gcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='GCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | ratio=1 / 4., 31 | pooling_type='att', 32 | fusion_types=('channel_add', ), 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/isanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ISAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | isa_channels=256, 31 | down_factor=(8, 8), 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/lraspp_m-v3-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | backbone=dict( 14 | type='MobileNetV3', 15 | arch='large', 16 | out_indices=(1, 3, 16), 17 | norm_cfg=norm_cfg), 18 | decode_head=dict( 19 | type='LRASPPHead', 20 | in_channels=(16, 24, 960), 21 | in_index=(0, 1, 2), 22 | channels=128, 23 | input_transform='multiple_select', 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | act_cfg=dict(type='ReLU'), 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | # model training and testing settings 32 | train_cfg=dict(), 33 | test_cfg=dict(mode='whole')) 34 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/nonlocal_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='NLHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dropout_ratio=0.1, 31 | reduction=2, 32 | use_scale=True, 33 | mode='embedded_gaussian', 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/ocrnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='CascadeEncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | num_stages=2, 14 | pretrained='open-mmlab://resnet50_v1c', 15 | backbone=dict( 16 | type='ResNetV1c', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | dilations=(1, 1, 2, 4), 21 | strides=(1, 2, 1, 1), 22 | norm_cfg=norm_cfg, 23 | norm_eval=False, 24 | style='pytorch', 25 | contract_dilation=True), 26 | decode_head=[ 27 | dict( 28 | type='FCNHead', 29 | in_channels=1024, 30 | in_index=2, 31 | channels=256, 32 | num_convs=1, 33 | concat_input=False, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 40 | dict( 41 | type='OCRHead', 42 | in_channels=2048, 43 | in_index=3, 44 | channels=512, 45 | ocr_channels=256, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 52 | ], 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='whole')) 56 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/pointrend_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='CascadeEncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | num_stages=2, 14 | pretrained='open-mmlab://resnet50_v1c', 15 | backbone=dict( 16 | type='ResNetV1c', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | dilations=(1, 1, 1, 1), 21 | strides=(1, 2, 2, 2), 22 | norm_cfg=norm_cfg, 23 | norm_eval=False, 24 | style='pytorch', 25 | contract_dilation=True), 26 | neck=dict( 27 | type='FPN', 28 | in_channels=[256, 512, 1024, 2048], 29 | out_channels=256, 30 | num_outs=4), 31 | decode_head=[ 32 | dict( 33 | type='FPNHead', 34 | in_channels=[256, 256, 256, 256], 35 | in_index=[0, 1, 2, 3], 36 | feature_strides=[4, 8, 16, 32], 37 | channels=128, 38 | dropout_ratio=-1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 44 | dict( 45 | type='PointHead', 46 | in_channels=[256], 47 | in_index=[0], 48 | channels=256, 49 | num_fcs=3, 50 | coarse_pred_each_layer=True, 51 | dropout_ratio=-1, 52 | num_classes=19, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 56 | ], 57 | # model training and testing settings 58 | train_cfg=dict( 59 | num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), 60 | test_cfg=dict( 61 | mode='whole', 62 | subdivision_steps=2, 63 | subdivision_num_points=8196, 64 | scale_factor=2)) 65 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/psanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='PSAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | mask_size=(97, 97), 31 | psa_type='bi-direction', 32 | compact=False, 33 | shrink_factor=2, 34 | normalization_factor=1.0, 35 | psa_softmax=True, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=1024, 45 | in_index=2, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/pspnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='PSPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pool_scales=(1, 2, 3, 6), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/pspnet_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='PSPHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=16, 36 | pool_scales=(1, 2, 3, 6), 37 | dropout_ratio=0.1, 38 | num_classes=2, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=128, 46 | in_index=3, 47 | channels=64, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=2, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/segformer_mit-b0.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='MixVisionTransformer', 16 | in_channels=3, 17 | embed_dims=32, 18 | num_stages=4, 19 | num_layers=[2, 2, 2, 2], 20 | num_heads=[1, 2, 5, 8], 21 | patch_sizes=[7, 3, 3, 3], 22 | sr_ratios=[8, 4, 2, 1], 23 | out_indices=(0, 1, 2, 3), 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | drop_rate=0.0, 27 | attn_drop_rate=0.0, 28 | drop_path_rate=0.1), 29 | decode_head=dict( 30 | type='SegformerHead', 31 | in_channels=[32, 64, 160, 256], 32 | in_index=[0, 1, 2, 3], 33 | channels=256, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | # model training and testing settings 41 | train_cfg=dict(), 42 | test_cfg=dict(mode='whole')) 43 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/segmenter_vit-b16_mask.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa 2 | # model settings 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[127.5, 127.5, 127.5], 7 | std=[127.5, 127.5, 127.5], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=checkpoint, 15 | backbone=dict( 16 | type='VisionTransformer', 17 | img_size=(512, 512), 18 | patch_size=16, 19 | in_channels=3, 20 | embed_dims=768, 21 | num_layers=12, 22 | num_heads=12, 23 | drop_path_rate=0.1, 24 | attn_drop_rate=0.0, 25 | drop_rate=0.0, 26 | final_norm=True, 27 | norm_cfg=backbone_norm_cfg, 28 | with_cls_token=True, 29 | interpolate_mode='bicubic', 30 | ), 31 | decode_head=dict( 32 | type='SegmenterMaskTransformerHead', 33 | in_channels=768, 34 | channels=768, 35 | num_classes=150, 36 | num_layers=2, 37 | num_heads=12, 38 | embed_dims=768, 39 | dropout_ratio=0.0, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 42 | ), 43 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), 44 | ) 45 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/twins_pcpvt-s_fpn.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | data_preprocessor = dict( 7 | type='SegDataPreProcessor', 8 | mean=[123.675, 116.28, 103.53], 9 | std=[58.395, 57.12, 57.375], 10 | bgr_to_rgb=True, 11 | pad_val=0, 12 | seg_pad_val=255) 13 | model = dict( 14 | type='EncoderDecoder', 15 | data_preprocessor=data_preprocessor, 16 | backbone=dict( 17 | type='PCPVT', 18 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 19 | in_channels=3, 20 | embed_dims=[64, 128, 320, 512], 21 | num_heads=[1, 2, 5, 8], 22 | patch_sizes=[4, 2, 2, 2], 23 | strides=[4, 2, 2, 2], 24 | mlp_ratios=[8, 8, 4, 4], 25 | out_indices=(0, 1, 2, 3), 26 | qkv_bias=True, 27 | norm_cfg=backbone_norm_cfg, 28 | depths=[3, 4, 6, 3], 29 | sr_ratios=[8, 4, 2, 1], 30 | norm_after_stage=False, 31 | drop_rate=0.0, 32 | attn_drop_rate=0., 33 | drop_path_rate=0.2), 34 | neck=dict( 35 | type='FPN', 36 | in_channels=[64, 128, 320, 512], 37 | out_channels=256, 38 | num_outs=4), 39 | decode_head=dict( 40 | type='FPNHead', 41 | in_channels=[256, 256, 256, 256], 42 | in_index=[0, 1, 2, 3], 43 | feature_strides=[4, 8, 16, 32], 44 | channels=128, 45 | dropout_ratio=0.1, 46 | num_classes=150, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/twins_pcpvt-s_upernet.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | data_preprocessor = dict( 7 | type='SegDataPreProcessor', 8 | mean=[123.675, 116.28, 103.53], 9 | std=[58.395, 57.12, 57.375], 10 | bgr_to_rgb=True, 11 | pad_val=0, 12 | seg_pad_val=255) 13 | model = dict( 14 | type='EncoderDecoder', 15 | data_preprocessor=data_preprocessor, 16 | backbone=dict( 17 | type='PCPVT', 18 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 19 | in_channels=3, 20 | embed_dims=[64, 128, 320, 512], 21 | num_heads=[1, 2, 5, 8], 22 | patch_sizes=[4, 2, 2, 2], 23 | strides=[4, 2, 2, 2], 24 | mlp_ratios=[8, 8, 4, 4], 25 | out_indices=(0, 1, 2, 3), 26 | qkv_bias=True, 27 | norm_cfg=backbone_norm_cfg, 28 | depths=[3, 4, 6, 3], 29 | sr_ratios=[8, 4, 2, 1], 30 | norm_after_stage=False, 31 | drop_rate=0.0, 32 | attn_drop_rate=0., 33 | drop_path_rate=0.2), 34 | decode_head=dict( 35 | type='UPerHead', 36 | in_channels=[64, 128, 320, 512], 37 | in_index=[0, 1, 2, 3], 38 | pool_scales=(1, 2, 3, 6), 39 | channels=512, 40 | dropout_ratio=0.1, 41 | num_classes=150, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 46 | auxiliary_head=dict( 47 | type='FCNHead', 48 | in_channels=320, 49 | in_index=2, 50 | channels=256, 51 | num_convs=1, 52 | concat_input=False, 53 | dropout_ratio=0.1, 54 | num_classes=150, 55 | norm_cfg=norm_cfg, 56 | align_corners=False, 57 | loss_decode=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 59 | # model training and testing settings 60 | train_cfg=dict(), 61 | test_cfg=dict(mode='whole')) 62 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_beit.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained=None, 13 | backbone=dict( 14 | type='BEiT', 15 | img_size=(640, 640), 16 | patch_size=16, 17 | in_channels=3, 18 | embed_dims=768, 19 | num_layers=12, 20 | num_heads=12, 21 | mlp_ratio=4, 22 | out_indices=(3, 5, 7, 11), 23 | qv_bias=True, 24 | attn_drop_rate=0.0, 25 | drop_path_rate=0.1, 26 | norm_cfg=dict(type='LN', eps=1e-6), 27 | act_cfg=dict(type='GELU'), 28 | norm_eval=False, 29 | init_values=0.1), 30 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 31 | decode_head=dict( 32 | type='UPerHead', 33 | in_channels=[768, 768, 768, 768], 34 | in_index=[0, 1, 2, 3], 35 | pool_scales=(1, 2, 3, 6), 36 | channels=768, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=768, 46 | in_index=2, 47 | channels=256, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=150, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='whole')) 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_convnext.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=None, 15 | backbone=dict( 16 | type='mmpretrain.ConvNeXt', 17 | arch='base', 18 | out_indices=[0, 1, 2, 3], 19 | drop_path_rate=0.4, 20 | layer_scale_init_value=1.0, 21 | gap_before_final_norm=False, 22 | init_cfg=dict( 23 | type='Pretrained', checkpoint=checkpoint_file, 24 | prefix='backbone.')), 25 | decode_head=dict( 26 | type='UPerHead', 27 | in_channels=[128, 256, 512, 1024], 28 | in_index=[0, 1, 2, 3], 29 | pool_scales=(1, 2, 3, 6), 30 | channels=512, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=384, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_mae.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained=None, 13 | backbone=dict( 14 | type='MAE', 15 | img_size=(640, 640), 16 | patch_size=16, 17 | in_channels=3, 18 | embed_dims=768, 19 | num_layers=12, 20 | num_heads=12, 21 | mlp_ratio=4, 22 | out_indices=(3, 5, 7, 11), 23 | attn_drop_rate=0.0, 24 | drop_path_rate=0.1, 25 | norm_cfg=dict(type='LN', eps=1e-6), 26 | act_cfg=dict(type='GELU'), 27 | norm_eval=False, 28 | init_values=0.1), 29 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 30 | decode_head=dict( 31 | type='UPerHead', 32 | in_channels=[384, 384, 384, 384], 33 | in_index=[0, 1, 2, 3], 34 | pool_scales=(1, 2, 3, 6), 35 | channels=512, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=384, 45 | in_index=2, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 1, 1), 20 | strides=(1, 2, 2, 2), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='UPerHead', 27 | in_channels=[256, 512, 1024, 2048], 28 | in_index=[0, 1, 2, 3], 29 | pool_scales=(1, 2, 3, 6), 30 | channels=512, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_swin.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=None, 15 | backbone=dict( 16 | type='SwinTransformer', 17 | pretrain_img_size=224, 18 | embed_dims=96, 19 | patch_size=4, 20 | window_size=7, 21 | mlp_ratio=4, 22 | depths=[2, 2, 6, 2], 23 | num_heads=[3, 6, 12, 24], 24 | strides=(4, 2, 2, 2), 25 | out_indices=(0, 1, 2, 3), 26 | qkv_bias=True, 27 | qk_scale=None, 28 | patch_norm=True, 29 | drop_rate=0., 30 | attn_drop_rate=0., 31 | drop_path_rate=0.3, 32 | use_abs_pos_embed=False, 33 | act_cfg=dict(type='GELU'), 34 | norm_cfg=backbone_norm_cfg), 35 | decode_head=dict( 36 | type='UPerHead', 37 | in_channels=[96, 192, 384, 768], 38 | in_index=[0, 1, 2, 3], 39 | pool_scales=(1, 2, 3, 6), 40 | channels=512, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 47 | auxiliary_head=dict( 48 | type='FCNHead', 49 | in_channels=384, 50 | in_index=2, 51 | channels=256, 52 | num_convs=1, 53 | concat_input=False, 54 | dropout_ratio=0.1, 55 | num_classes=19, 56 | norm_cfg=norm_cfg, 57 | align_corners=False, 58 | loss_decode=dict( 59 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 60 | # model training and testing settings 61 | train_cfg=dict(), 62 | test_cfg=dict(mode='whole')) 63 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', 14 | backbone=dict( 15 | type='VisionTransformer', 16 | img_size=(512, 512), 17 | patch_size=16, 18 | in_channels=3, 19 | embed_dims=768, 20 | num_layers=12, 21 | num_heads=12, 22 | mlp_ratio=4, 23 | out_indices=(2, 5, 8, 11), 24 | qkv_bias=True, 25 | drop_rate=0.0, 26 | attn_drop_rate=0.0, 27 | drop_path_rate=0.0, 28 | with_cls_token=True, 29 | norm_cfg=dict(type='LN', eps=1e-6), 30 | act_cfg=dict(type='GELU'), 31 | norm_eval=False, 32 | interpolate_mode='bicubic'), 33 | neck=dict( 34 | type='MultiLevelNeck', 35 | in_channels=[768, 768, 768, 768], 36 | out_channels=768, 37 | scales=[4, 2, 1, 0.5]), 38 | decode_head=dict( 39 | type='UPerHead', 40 | in_channels=[768, 768, 768, 768], 41 | in_index=[0, 1, 2, 3], 42 | pool_scales=(1, 2, 3, 6), 43 | channels=512, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 50 | auxiliary_head=dict( 51 | type='FCNHead', 52 | in_channels=768, 53 | in_index=3, 54 | channels=256, 55 | num_convs=1, 56 | concat_input=False, 57 | dropout_ratio=0.1, 58 | num_classes=19, 59 | norm_cfg=norm_cfg, 60 | align_corners=False, 61 | loss_decode=dict( 62 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 63 | # model training and testing settings 64 | train_cfg=dict(), 65 | test_cfg=dict(mode='whole')) # yapf: disable 66 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=160000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 160k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=20000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 20k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_240k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=240000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 240k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_25k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='LinearLR', start_factor=3e-2, begin=0, end=12000, 8 | by_epoch=False), 9 | dict( 10 | type='PolyLRRatio', 11 | eta_min_ratio=3e-2, 12 | power=0.9, 13 | begin=12000, 14 | end=24000, 15 | by_epoch=False), 16 | dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000) 17 | ] 18 | # training schedule for 25k 19 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000) 20 | val_cfg = dict(type='ValLoop') 21 | test_cfg = dict(type='TestLoop') 22 | default_hooks = dict( 23 | timer=dict(type='IterTimerHook'), 24 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 25 | param_scheduler=dict(type='ParamSchedulerHook'), 26 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 27 | sampler_seed=dict(type='DistSamplerSeedHook'), 28 | visualization=dict(type='SegVisualizationHook')) 29 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=320000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 320k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=40000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 40k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=80000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 80k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 10 | auxiliary_head=dict(in_channels=512, num_classes=150), 11 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), 12 | ) 13 | 14 | optim_wrapper = dict( 15 | _delete_=True, 16 | type='AmpOptimWrapper', 17 | optimizer=dict( 18 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 19 | paramwise_cfg={ 20 | 'decay_rate': 0.9, 21 | 'decay_type': 'stage_wise', 22 | 'num_layers': 12 23 | }, 24 | constructor='LearningRateDecayOptimizerConstructor', 25 | loss_scale='dynamic') 26 | 27 | param_scheduler = [ 28 | dict( 29 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 30 | dict( 31 | type='PolyLR', 32 | power=1.0, 33 | begin=1500, 34 | end=160000, 35 | eta_min=0.0, 36 | by_epoch=False, 37 | ) 38 | ] 39 | 40 | # By default, models are trained on 8 GPUs with 2 images per GPU 41 | train_dataloader = dict(batch_size=2) 42 | val_dataloader = dict(batch_size=1) 43 | test_dataloader = val_dataloader 44 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', 3 | '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (640, 640) 7 | data_preprocessor = dict(size=crop_size) 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth' # noqa 9 | model = dict( 10 | data_preprocessor=data_preprocessor, 11 | backbone=dict( 12 | type='mmpretrain.ConvNeXt', 13 | arch='base', 14 | out_indices=[0, 1, 2, 3], 15 | drop_path_rate=0.4, 16 | layer_scale_init_value=1.0, 17 | gap_before_final_norm=False, 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint=checkpoint_file, 20 | prefix='backbone.')), 21 | decode_head=dict( 22 | in_channels=[128, 256, 512, 1024], 23 | num_classes=150, 24 | ), 25 | auxiliary_head=dict(in_channels=512, num_classes=150), 26 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), 27 | ) 28 | 29 | optim_wrapper = dict( 30 | _delete_=True, 31 | type='AmpOptimWrapper', 32 | optimizer=dict( 33 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 34 | paramwise_cfg={ 35 | 'decay_rate': 0.9, 36 | 'decay_type': 'stage_wise', 37 | 'num_layers': 12 38 | }, 39 | constructor='LearningRateDecayOptimizerConstructor', 40 | loss_scale='dynamic') 41 | 42 | param_scheduler = [ 43 | dict( 44 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 45 | dict( 46 | type='PolyLR', 47 | power=1.0, 48 | begin=1500, 49 | end=160000, 50 | eta_min=0.0, 51 | by_epoch=False, 52 | ) 53 | ] 54 | 55 | # By default, models are trained on 8 GPUs with 2 images per GPU 56 | train_dataloader = dict(batch_size=2) 57 | val_dataloader = dict(batch_size=1) 58 | test_dataloader = val_dataloader 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', 3 | '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (640, 640) 7 | data_preprocessor = dict(size=crop_size) 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth' # noqa 9 | model = dict( 10 | data_preprocessor=data_preprocessor, 11 | backbone=dict( 12 | type='mmpretrain.ConvNeXt', 13 | arch='large', 14 | out_indices=[0, 1, 2, 3], 15 | drop_path_rate=0.4, 16 | layer_scale_init_value=1.0, 17 | gap_before_final_norm=False, 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint=checkpoint_file, 20 | prefix='backbone.')), 21 | decode_head=dict( 22 | in_channels=[192, 384, 768, 1536], 23 | num_classes=150, 24 | ), 25 | auxiliary_head=dict(in_channels=768, num_classes=150), 26 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), 27 | ) 28 | 29 | optim_wrapper = dict( 30 | _delete_=True, 31 | type='AmpOptimWrapper', 32 | optimizer=dict( 33 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 34 | paramwise_cfg={ 35 | 'decay_rate': 0.9, 36 | 'decay_type': 'stage_wise', 37 | 'num_layers': 12 38 | }, 39 | constructor='LearningRateDecayOptimizerConstructor', 40 | loss_scale='dynamic') 41 | 42 | param_scheduler = [ 43 | dict( 44 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 45 | dict( 46 | type='PolyLR', 47 | power=1.0, 48 | begin=1500, 49 | end=160000, 50 | eta_min=0.0, 51 | by_epoch=False, 52 | ) 53 | ] 54 | 55 | # By default, models are trained on 8 GPUs with 2 images per GPU 56 | train_dataloader = dict(batch_size=2) 57 | val_dataloader = dict(batch_size=1) 58 | test_dataloader = val_dataloader 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | backbone=dict( 11 | type='mmpretrain.ConvNeXt', 12 | arch='small', 13 | out_indices=[0, 1, 2, 3], 14 | drop_path_rate=0.3, 15 | layer_scale_init_value=1.0, 16 | gap_before_final_norm=False, 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint=checkpoint_file, 19 | prefix='backbone.')), 20 | decode_head=dict( 21 | in_channels=[96, 192, 384, 768], 22 | num_classes=150, 23 | ), 24 | auxiliary_head=dict(in_channels=384, num_classes=150), 25 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), 26 | ) 27 | 28 | optim_wrapper = dict( 29 | _delete_=True, 30 | type='AmpOptimWrapper', 31 | optimizer=dict( 32 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 33 | paramwise_cfg={ 34 | 'decay_rate': 0.9, 35 | 'decay_type': 'stage_wise', 36 | 'num_layers': 12 37 | }, 38 | constructor='LearningRateDecayOptimizerConstructor', 39 | loss_scale='dynamic') 40 | 41 | param_scheduler = [ 42 | dict( 43 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 44 | dict( 45 | type='PolyLR', 46 | power=1.0, 47 | begin=1500, 48 | end=160000, 49 | eta_min=0.0, 50 | by_epoch=False, 51 | ) 52 | ] 53 | 54 | # By default, models are trained on 8 GPUs with 2 images per GPU 55 | train_dataloader = dict(batch_size=2) 56 | val_dataloader = dict(batch_size=1) 57 | test_dataloader = val_dataloader 58 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | backbone=dict( 11 | type='mmpretrain.ConvNeXt', 12 | arch='tiny', 13 | out_indices=[0, 1, 2, 3], 14 | drop_path_rate=0.4, 15 | layer_scale_init_value=1.0, 16 | gap_before_final_norm=False, 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint=checkpoint_file, 19 | prefix='backbone.')), 20 | decode_head=dict( 21 | in_channels=[96, 192, 384, 768], 22 | num_classes=150, 23 | ), 24 | auxiliary_head=dict(in_channels=384, num_classes=150), 25 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), 26 | ) 27 | 28 | optim_wrapper = dict( 29 | _delete_=True, 30 | type='AmpOptimWrapper', 31 | optimizer=dict( 32 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 33 | paramwise_cfg={ 34 | 'decay_rate': 0.9, 35 | 'decay_type': 'stage_wise', 36 | 'num_layers': 6 37 | }, 38 | constructor='LearningRateDecayOptimizerConstructor', 39 | loss_scale='dynamic') 40 | 41 | param_scheduler = [ 42 | dict( 43 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 44 | dict( 45 | type='PolyLR', 46 | power=1.0, 47 | begin=1500, 48 | end=160000, 49 | eta_min=0.0, 50 | by_epoch=False, 51 | ) 52 | ] 53 | 54 | # By default, models are trained on 8 GPUs with 2 images per GPU 55 | train_dataloader = dict(batch_size=2) 56 | val_dataloader = dict(batch_size=1) 57 | test_dataloader = val_dataloader 58 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', 3 | '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (640, 640) 7 | data_preprocessor = dict(size=crop_size) 8 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth' # noqa 9 | model = dict( 10 | data_preprocessor=data_preprocessor, 11 | backbone=dict( 12 | type='mmpretrain.ConvNeXt', 13 | arch='xlarge', 14 | out_indices=[0, 1, 2, 3], 15 | drop_path_rate=0.4, 16 | layer_scale_init_value=1.0, 17 | gap_before_final_norm=False, 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint=checkpoint_file, 20 | prefix='backbone.')), 21 | decode_head=dict( 22 | in_channels=[256, 512, 1024, 2048], 23 | num_classes=150, 24 | ), 25 | auxiliary_head=dict(in_channels=1024, num_classes=150), 26 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), 27 | ) 28 | 29 | optim_wrapper = dict( 30 | _delete_=True, 31 | type='AmpOptimWrapper', 32 | optimizer=dict( 33 | type='AdamW', lr=0.00008, betas=(0.9, 0.999), weight_decay=0.05), 34 | paramwise_cfg={ 35 | 'decay_rate': 0.9, 36 | 'decay_type': 'stage_wise', 37 | 'num_layers': 12 38 | }, 39 | constructor='LearningRateDecayOptimizerConstructor', 40 | loss_scale='dynamic') 41 | 42 | param_scheduler = [ 43 | dict( 44 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 45 | dict( 46 | type='PolyLR', 47 | power=1.0, 48 | begin=1500, 49 | end=160000, 50 | eta_min=0.0, 51 | by_epoch=False, 52 | ) 53 | ] 54 | 55 | # By default, models are trained on 8 GPUs with 2 images per GPU 56 | train_dataloader = dict(batch_size=2) 57 | val_dataloader = dict(batch_size=1) 58 | test_dataloader = val_dataloader 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | pretrain_img_size=384, 9 | embed_dims=128, 10 | depths=[2, 2, 18, 2], 11 | num_heads=[4, 8, 16, 32], 12 | window_size=12), 13 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 14 | auxiliary_head=dict(in_channels=512, num_classes=150)) 15 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py' # noqa 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) 8 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | embed_dims=128, 9 | depths=[2, 2, 18, 2], 10 | num_heads=[4, 8, 16, 32]), 11 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 12 | auxiliary_head=dict(in_channels=512, num_classes=150)) 13 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) 8 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-large-patch4-window7-in22k-pre_upernet_' 3 | '8xb2-160k_ade20k-512x512.py' 4 | ] 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth' # noqa 6 | model = dict( 7 | backbone=dict( 8 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 9 | pretrain_img_size=384, 10 | window_size=12)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_' 3 | 'ade20k-512x512.py' 4 | ] 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth' # noqa 6 | model = dict( 7 | backbone=dict( 8 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 9 | pretrain_img_size=224, 10 | embed_dims=192, 11 | depths=[2, 2, 18, 2], 12 | num_heads=[6, 12, 24, 48], 13 | window_size=7), 14 | decode_head=dict(in_channels=[192, 384, 768, 1536], num_classes=150), 15 | auxiliary_head=dict(in_channels=768, num_classes=150)) 16 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | depths=[2, 2, 18, 2]), 9 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), 10 | auxiliary_head=dict(in_channels=384, num_classes=150)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | backbone=dict( 11 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 12 | embed_dims=96, 13 | depths=[2, 2, 6, 2], 14 | num_heads=[3, 6, 12, 24], 15 | window_size=7, 16 | use_abs_pos_embed=False, 17 | drop_path_rate=0.3, 18 | patch_norm=True), 19 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), 20 | auxiliary_head=dict(in_channels=384, num_classes=150)) 21 | 22 | # AdamW optimizer, no weight decay for position embedding & layer norm 23 | # in backbone 24 | optim_wrapper = dict( 25 | _delete_=True, 26 | type='OptimWrapper', 27 | optimizer=dict( 28 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 29 | paramwise_cfg=dict( 30 | custom_keys={ 31 | 'absolute_pos_embed': dict(decay_mult=0.), 32 | 'relative_position_bias_table': dict(decay_mult=0.), 33 | 'norm': dict(decay_mult=0.) 34 | })) 35 | 36 | param_scheduler = [ 37 | dict( 38 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 39 | dict( 40 | type='PolyLR', 41 | eta_min=0.0, 42 | power=1.0, 43 | begin=1500, 44 | end=160000, 45 | by_epoch=False, 46 | ) 47 | ] 48 | 49 | # By default, models are trained on 8 GPUs with 2 images per GPU 50 | train_dataloader = dict(batch_size=2) 51 | val_dataloader = dict(batch_size=1) 52 | test_dataloader = val_dataloader 53 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_swin.py', '../_base_/datasets/levir_256x256.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' 4 | ] 5 | crop_size = (256, 256) 6 | norm_cfg = dict(type='BN', requires_grad=True) 7 | data_preprocessor = dict( 8 | size=crop_size, 9 | type='SegDataPreProcessor', 10 | mean=[123.675, 116.28, 103.53, 123.675, 116.28, 103.53], 11 | std=[58.395, 57.12, 57.375, 58.395, 57.12, 57.375]) 12 | 13 | model = dict( 14 | data_preprocessor=data_preprocessor, 15 | backbone=dict( 16 | in_channels=6, 17 | embed_dims=96, 18 | depths=[2, 2, 6, 2], 19 | num_heads=[3, 6, 12, 24], 20 | window_size=7, 21 | use_abs_pos_embed=False, 22 | drop_path_rate=0.3, 23 | patch_norm=True), 24 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=2), 25 | auxiliary_head=dict(in_channels=384, num_classes=2)) 26 | 27 | # AdamW optimizer, no weight decay for position embedding & layer norm 28 | # in backbone 29 | optim_wrapper = dict( 30 | _delete_=True, 31 | type='OptimWrapper', 32 | optimizer=dict( 33 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 34 | paramwise_cfg=dict( 35 | custom_keys={ 36 | 'absolute_pos_embed': dict(decay_mult=0.), 37 | 'relative_position_bias_table': dict(decay_mult=0.), 38 | 'norm': dict(decay_mult=0.) 39 | })) 40 | 41 | param_scheduler = [ 42 | dict( 43 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 44 | dict( 45 | type='PolyLR', 46 | eta_min=0.0, 47 | power=1.0, 48 | begin=1500, 49 | end=20000, 50 | by_epoch=False, 51 | ) 52 | ] 53 | 54 | train_dataloader = dict(batch_size=4) 55 | val_dataloader = dict(batch_size=1) 56 | test_dataloader = val_dataloader 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-769x769.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-769x769.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-160k_ade20k-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-20k_voc12aug-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-40k_voc12aug-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-80k_ade20k-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet18_v1c', 4 | backbone=dict(depth=18), 5 | decode_head=dict(in_channels=[64, 128, 256, 512]), 6 | auxiliary_head=dict(in_channels=256)) 7 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet18_v1c', 4 | backbone=dict(depth=18), 5 | decode_head=dict(in_channels=[64, 128, 256, 512]), 6 | auxiliary_head=dict(in_channels=256)) 7 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | model = dict( 6 | pretrained='open-mmlab://resnet18_v1c', 7 | backbone=dict(depth=18), 8 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), 9 | auxiliary_head=dict(in_channels=256, num_classes=150)) 10 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_20k.py' 5 | ] 6 | model = dict( 7 | pretrained='open-mmlab://resnet18_v1c', 8 | backbone=dict(depth=18), 9 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), 10 | auxiliary_head=dict(in_channels=256, num_classes=21)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | model = dict( 7 | pretrained='open-mmlab://resnet18_v1c', 8 | backbone=dict(depth=18), 9 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), 10 | auxiliary_head=dict(in_channels=256, num_classes=21)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | model = dict( 6 | pretrained='open-mmlab://resnet18_v1c', 7 | backbone=dict(depth=18), 8 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), 9 | auxiliary_head=dict(in_channels=256, num_classes=150)) 10 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' 4 | ] 5 | crop_size = (512, 1024) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict(data_preprocessor=data_preprocessor) 8 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | crop_size = (769, 769) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(align_corners=True), 11 | auxiliary_head=dict(align_corners=True), 12 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 13 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | crop_size = (512, 1024) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict(data_preprocessor=data_preprocessor) 8 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_80k.py' 5 | ] 6 | crop_size = (769, 769) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(align_corners=True), 11 | auxiliary_head=dict(align_corners=True), 12 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 13 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(num_classes=150), 10 | auxiliary_head=dict(num_classes=150)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_20k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(num_classes=21), 11 | auxiliary_head=dict(num_classes=21)) 12 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(num_classes=21), 11 | auxiliary_head=dict(num_classes=21)) 12 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(num_classes=150), 10 | auxiliary_head=dict(num_classes=150)) 11 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1, final_norm=True)) 6 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | ) 7 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | neck=None) 7 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | neck=None) 7 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict( 6 | num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), 7 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 8 | neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), 9 | auxiliary_head=dict(num_classes=150, in_channels=384)) 10 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=None, 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=None, 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | backbone=dict(drop_path_rate=0.1, final_norm=True), 12 | decode_head=dict(num_classes=150), 13 | auxiliary_head=dict(num_classes=150)) 14 | 15 | # AdamW optimizer, no weight decay for position embedding & layer norm 16 | # in backbone 17 | optim_wrapper = dict( 18 | _delete_=True, 19 | type='OptimWrapper', 20 | optimizer=dict( 21 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 22 | paramwise_cfg=dict( 23 | custom_keys={ 24 | 'pos_embed': dict(decay_mult=0.), 25 | 'cls_token': dict(decay_mult=0.), 26 | 'norm': dict(decay_mult=0.) 27 | })) 28 | 29 | param_scheduler = [ 30 | dict( 31 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 32 | dict( 33 | type='PolyLR', 34 | eta_min=0.0, 35 | power=1.0, 36 | begin=1500, 37 | end=160000, 38 | by_epoch=False, 39 | ) 40 | ] 41 | 42 | # By default, models are trained on 8 GPUs with 2 images per GPU 43 | train_dataloader = dict(batch_size=2) 44 | val_dataloader = dict(batch_size=1) 45 | test_dataloader = val_dataloader 46 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | decode_head=dict(num_classes=150), 12 | auxiliary_head=dict(num_classes=150)) 13 | 14 | # AdamW optimizer, no weight decay for position embedding & layer norm 15 | # in backbone 16 | optim_wrapper = dict( 17 | _delete_=True, 18 | type='OptimWrapper', 19 | optimizer=dict( 20 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 21 | paramwise_cfg=dict( 22 | custom_keys={ 23 | 'pos_embed': dict(decay_mult=0.), 24 | 'cls_token': dict(decay_mult=0.), 25 | 'norm': dict(decay_mult=0.) 26 | })) 27 | 28 | param_scheduler = [ 29 | dict( 30 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 31 | dict( 32 | type='PolyLR', 33 | eta_min=0.0, 34 | power=1.0, 35 | begin=1500, 36 | end=160000, 37 | by_epoch=False, 38 | ) 39 | ] 40 | 41 | # By default, models are trained on 8 GPUs with 2 images per GPU 42 | train_dataloader = dict(batch_size=2) 43 | val_dataloader = dict(batch_size=1) 44 | test_dataloader = val_dataloader 45 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_80k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | decode_head=dict(num_classes=150), 12 | auxiliary_head=dict(num_classes=150)) 13 | 14 | # AdamW optimizer, no weight decay for position embedding & layer norm 15 | # in backbone 16 | optim_wrapper = dict( 17 | _delete_=True, 18 | type='OptimWrapper', 19 | optimizer=dict( 20 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 21 | paramwise_cfg=dict( 22 | custom_keys={ 23 | 'pos_embed': dict(decay_mult=0.), 24 | 'cls_token': dict(decay_mult=0.), 25 | 'norm': dict(decay_mult=0.) 26 | })) 27 | 28 | param_scheduler = [ 29 | dict( 30 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 31 | dict( 32 | type='PolyLR', 33 | eta_min=0.0, 34 | power=1.0, 35 | begin=1500, 36 | end=80000, 37 | by_epoch=False, 38 | ) 39 | ] 40 | 41 | # By default, models are trained on 8 GPUs with 2 images per GPU 42 | train_dataloader = dict(batch_size=2) 43 | val_dataloader = dict(batch_size=1) 44 | test_dataloader = val_dataloader 45 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/configs/vssm_2d/upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | # checkpoint_path = ('/scratch/KurcGroup/jingwei/result/v2dmamba/vmamba/' 5 | # 'vmambav2v_2d_tiny_224/vssm1_tiny_0230s/20241023030251/' 6 | # 'ckpt_epoch_269.pth') # noqa 7 | # checkpoint_path = ('/gpfs/scratch/jingwezhang/checkpoint/v2dmamba/tiny_1k/ckpt_epoch_269.pth') # noqa 8 | checkpoint_path = ('') 9 | model = dict( 10 | backbone=dict( 11 | type='MM_VSSM', 12 | out_indices=(0, 1, 2, 3), 13 | pretrained=checkpoint_path, # here is the path 14 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 15 | dims=96, 16 | # depths=(2, 2, 5, 2), 17 | depths=(2, 2, 8, 2), 18 | ssm_d_state=1, 19 | ssm_dt_rank="auto", 20 | # ssm_ratio=2.0, 21 | ssm_ratio=1.0, 22 | ssm_conv=3, 23 | ssm_conv_bias=False, 24 | forward_type="v05_noz", # v3_noz, 25 | mlp_ratio=4.0, 26 | downsample_version="v3", 27 | patchembed_version="v2", 28 | drop_path_rate=0.2, 29 | norm_layer="ln2d", 30 | use_v2d=True 31 | ),) 32 | train_dataloader = dict(batch_size=4) # as gpus=4 33 | 34 | # default_hooks = dict( 35 | # timer=dict(type='IterTimerHook'), 36 | # logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 37 | # param_scheduler=dict(type='ParamSchedulerHook'), 38 | # checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 39 | # sampler_seed=dict(type='DistSamplerSeedHook'), 40 | # visualization=dict(type='SegVisualizationHook', draw=True, interval=1)) 41 | 42 | 43 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import partial 3 | from typing import Callable 4 | 5 | import torch 6 | from torch import nn 7 | from torch.utils import checkpoint 8 | 9 | from mmengine.model import BaseModule 10 | from mmdet.registry import MODELS as MODELS_MMDET 11 | from mmseg.registry import MODELS as MODELS_MMSEG 12 | 13 | def import_abspy(name="models", path="classification/"): 14 | import sys 15 | import importlib 16 | path = os.path.abspath(path) 17 | assert os.path.isdir(path) 18 | sys.path.insert(0, path) 19 | module = importlib.import_module(name) 20 | sys.path.pop(0) 21 | return module 22 | 23 | build = import_abspy( 24 | "models", 25 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"), 26 | ) 27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM 28 | 29 | @MODELS_MMSEG.register_module() 30 | @MODELS_MMDET.register_module() 31 | class MM_VSSM(BaseModule, Backbone_VSSM): 32 | def __init__(self, *args, **kwargs): 33 | BaseModule.__init__(self) 34 | Backbone_VSSM.__init__(self, *args, **kwargs) 35 | 36 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/readme.md: -------------------------------------------------------------------------------- 1 | ## origins 2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmsegmentation: `version 1.2.2` 3 | 4 | ## modifications 5 | `tools/train.py#13` is added with `import model` 6 | `tools/test.py#8` is added with `import model` 7 | 8 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/dataset_converters/cityscapes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | 5 | from cityscapesscripts.preparation.json2labelImg import json2labelImg 6 | from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress, 7 | track_progress) 8 | 9 | 10 | def convert_json_to_label(json_file): 11 | label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') 12 | json2labelImg(json_file, label_file, 'trainIds') 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser( 17 | description='Convert Cityscapes annotations to TrainIds') 18 | parser.add_argument('cityscapes_path', help='cityscapes data path') 19 | parser.add_argument('--gt-dir', default='gtFine', type=str) 20 | parser.add_argument('-o', '--out-dir', help='output path') 21 | parser.add_argument( 22 | '--nproc', default=1, type=int, help='number of process') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | args = parse_args() 29 | cityscapes_path = args.cityscapes_path 30 | out_dir = args.out_dir if args.out_dir else cityscapes_path 31 | mkdir_or_exist(out_dir) 32 | 33 | gt_dir = osp.join(cityscapes_path, args.gt_dir) 34 | 35 | poly_files = [] 36 | for poly in scandir(gt_dir, '_polygons.json', recursive=True): 37 | poly_file = osp.join(gt_dir, poly) 38 | poly_files.append(poly_file) 39 | if args.nproc > 1: 40 | track_parallel_progress(convert_json_to_label, poly_files, args.nproc) 41 | else: 42 | track_progress(convert_json_to_label, poly_files) 43 | 44 | split_names = ['train', 'val', 'test'] 45 | 46 | for split in split_names: 47 | filenames = [] 48 | for poly in scandir( 49 | osp.join(gt_dir, split), '_polygons.json', recursive=True): 50 | filenames.append(poly.replace('_gtFine_polygons.json', '')) 51 | with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: 52 | f.writelines(f + '\n' for f in filenames) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | GPUS=$2 3 | NNODES=${NNODES:-1} 4 | NODE_RANK=${NODE_RANK:-0} 5 | PORT=${PORT:-29500} 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch \ 10 | --nnodes=$NNODES \ 11 | --node_rank=$NODE_RANK \ 12 | --master_addr=$MASTER_ADDR \ 13 | --nproc_per_node=$GPUS \ 14 | --master_port=$PORT \ 15 | $(dirname "$0")/train.py \ 16 | $CONFIG \ 17 | --launcher pytorch ${@:3} 18 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/misc/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | from hashlib import sha256 5 | 6 | import torch 7 | 8 | BLOCK_SIZE = 128 * 1024 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Process a checkpoint to be published') 14 | parser.add_argument('in_file', help='input checkpoint filename') 15 | parser.add_argument('out_file', help='output checkpoint filename') 16 | args = parser.parse_args() 17 | return args 18 | 19 | 20 | def sha256sum(filename: str) -> str: 21 | """Compute SHA256 message digest from a file.""" 22 | hash_func = sha256() 23 | byte_array = bytearray(BLOCK_SIZE) 24 | memory_view = memoryview(byte_array) 25 | with open(filename, 'rb', buffering=0) as file: 26 | for block in iter(lambda: file.readinto(memory_view), 0): 27 | hash_func.update(memory_view[:block]) 28 | return hash_func.hexdigest() 29 | 30 | 31 | def process_checkpoint(in_file, out_file): 32 | checkpoint = torch.load(in_file, map_location='cpu') 33 | # remove optimizer for smaller file size 34 | if 'optimizer' in checkpoint: 35 | del checkpoint['optimizer'] 36 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 37 | # add the code here. 38 | torch.save(checkpoint, out_file) 39 | sha = sha256sum(in_file) 40 | final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth' 41 | subprocess.Popen(['mv', out_file, final_file]) 42 | 43 | 44 | def main(): 45 | args = parse_args() 46 | process_checkpoint(args.in_file, args.out_file) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/model_converters/beit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmengine 7 | import torch 8 | from mmengine.runner import CheckpointLoader 9 | 10 | 11 | def convert_beit(ckpt): 12 | new_ckpt = OrderedDict() 13 | 14 | for k, v in ckpt.items(): 15 | if k.startswith('patch_embed'): 16 | new_key = k.replace('patch_embed.proj', 'patch_embed.projection') 17 | new_ckpt[new_key] = v 18 | if k.startswith('blocks'): 19 | new_key = k.replace('blocks', 'layers') 20 | if 'norm' in new_key: 21 | new_key = new_key.replace('norm', 'ln') 22 | elif 'mlp.fc1' in new_key: 23 | new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0') 24 | elif 'mlp.fc2' in new_key: 25 | new_key = new_key.replace('mlp.fc2', 'ffn.layers.1') 26 | new_ckpt[new_key] = v 27 | else: 28 | new_key = k 29 | new_ckpt[new_key] = v 30 | 31 | return new_ckpt 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser( 36 | description='Convert keys in official pretrained beit models to' 37 | 'MMSegmentation style.') 38 | parser.add_argument('src', help='src model path or url') 39 | # The dst path must be a full path of the new checkpoint. 40 | parser.add_argument('dst', help='save path') 41 | args = parser.parse_args() 42 | 43 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 44 | if 'state_dict' in checkpoint: 45 | state_dict = checkpoint['state_dict'] 46 | elif 'model' in checkpoint: 47 | state_dict = checkpoint['model'] 48 | else: 49 | state_dict = checkpoint 50 | weight = convert_beit(state_dict) 51 | mmengine.mkdir_or_exist(osp.dirname(args.dst)) 52 | torch.save(weight, args.dst) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/model_converters/vit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmengine 7 | import torch 8 | from mmengine.runner import CheckpointLoader 9 | 10 | 11 | def convert_vit(ckpt): 12 | 13 | new_ckpt = OrderedDict() 14 | 15 | for k, v in ckpt.items(): 16 | if k.startswith('head'): 17 | continue 18 | if k.startswith('norm'): 19 | new_k = k.replace('norm.', 'ln1.') 20 | elif k.startswith('patch_embed'): 21 | if 'proj' in k: 22 | new_k = k.replace('proj', 'projection') 23 | else: 24 | new_k = k 25 | elif k.startswith('blocks'): 26 | if 'norm' in k: 27 | new_k = k.replace('norm', 'ln') 28 | elif 'mlp.fc1' in k: 29 | new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') 30 | elif 'mlp.fc2' in k: 31 | new_k = k.replace('mlp.fc2', 'ffn.layers.1') 32 | elif 'attn.qkv' in k: 33 | new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_') 34 | elif 'attn.proj' in k: 35 | new_k = k.replace('attn.proj', 'attn.attn.out_proj') 36 | else: 37 | new_k = k 38 | new_k = new_k.replace('blocks.', 'layers.') 39 | else: 40 | new_k = k 41 | new_ckpt[new_k] = v 42 | 43 | return new_ckpt 44 | 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser( 48 | description='Convert keys in timm pretrained vit models to ' 49 | 'MMSegmentation style.') 50 | parser.add_argument('src', help='src model path or url') 51 | # The dst path must be a full path of the new checkpoint. 52 | parser.add_argument('dst', help='save path') 53 | args = parser.parse_args() 54 | 55 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 56 | if 'state_dict' in checkpoint: 57 | # timm checkpoint 58 | state_dict = checkpoint['state_dict'] 59 | elif 'model' in checkpoint: 60 | # deit checkpoint 61 | state_dict = checkpoint['model'] 62 | else: 63 | state_dict = checkpoint 64 | weight = convert_vit(state_dict) 65 | mmengine.mkdir_or_exist(osp.dirname(args.dst)) 66 | torch.save(weight, args.dst) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-4} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/torchserve/mmseg_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import base64 3 | import os 4 | 5 | import cv2 6 | import mmcv 7 | import torch 8 | from mmengine.model.utils import revert_sync_batchnorm 9 | from ts.torch_handler.base_handler import BaseHandler 10 | 11 | from mmseg.apis import inference_model, init_model 12 | 13 | 14 | class MMsegHandler(BaseHandler): 15 | 16 | def initialize(self, context): 17 | properties = context.system_properties 18 | self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' 19 | self.device = torch.device(self.map_location + ':' + 20 | str(properties.get('gpu_id')) if torch.cuda. 21 | is_available() else self.map_location) 22 | self.manifest = context.manifest 23 | 24 | model_dir = properties.get('model_dir') 25 | serialized_file = self.manifest['model']['serializedFile'] 26 | checkpoint = os.path.join(model_dir, serialized_file) 27 | self.config_file = os.path.join(model_dir, 'config.py') 28 | 29 | self.model = init_model(self.config_file, checkpoint, self.device) 30 | self.model = revert_sync_batchnorm(self.model) 31 | self.initialized = True 32 | 33 | def preprocess(self, data): 34 | images = [] 35 | 36 | for row in data: 37 | image = row.get('data') or row.get('body') 38 | if isinstance(image, str): 39 | image = base64.b64decode(image) 40 | image = mmcv.imfrombytes(image) 41 | images.append(image) 42 | 43 | return images 44 | 45 | def inference(self, data, *args, **kwargs): 46 | results = [inference_model(self.model, img) for img in data] 47 | return results 48 | 49 | def postprocess(self, data): 50 | output = [] 51 | 52 | for image_result in data: 53 | _, buffer = cv2.imencode('.png', image_result[0].astype('uint8')) 54 | content = buffer.tobytes() 55 | output.append(content) 56 | return output 57 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/tools/torchserve/test_torchserve.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser 3 | from io import BytesIO 4 | 5 | import matplotlib.pyplot as plt 6 | import mmcv 7 | import requests 8 | 9 | from mmseg.apis import inference_model, init_model 10 | 11 | 12 | def parse_args(): 13 | parser = ArgumentParser( 14 | description='Compare result of torchserve and pytorch,' 15 | 'and visualize them.') 16 | parser.add_argument('img', help='Image file') 17 | parser.add_argument('config', help='Config file') 18 | parser.add_argument('checkpoint', help='Checkpoint file') 19 | parser.add_argument('model_name', help='The model name in the server') 20 | parser.add_argument( 21 | '--inference-addr', 22 | default='127.0.0.1:8080', 23 | help='Address and port of the inference server') 24 | parser.add_argument( 25 | '--result-image', 26 | type=str, 27 | default=None, 28 | help='save server output in result-image') 29 | parser.add_argument( 30 | '--device', default='cuda:0', help='Device used for inference') 31 | 32 | args = parser.parse_args() 33 | return args 34 | 35 | 36 | def main(args): 37 | url = 'http://' + args.inference_addr + '/predictions/' + args.model_name 38 | with open(args.img, 'rb') as image: 39 | tmp_res = requests.post(url, image) 40 | content = tmp_res.content 41 | if args.result_image: 42 | with open(args.result_image, 'wb') as out_image: 43 | out_image.write(content) 44 | plt.imshow(mmcv.imread(args.result_image, 'grayscale')) 45 | plt.show() 46 | else: 47 | plt.imshow(plt.imread(BytesIO(content))) 48 | plt.show() 49 | model = init_model(args.config, args.checkpoint, args.device) 50 | image = mmcv.imread(args.img) 51 | result = inference_model(model, image) 52 | plt.imshow(result[0]) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | args = parse_args() 58 | main(args) 59 | -------------------------------------------------------------------------------- /2DVMamba/segmentation/vis/vis_seg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from mmseg.apis import init_model, inference_model, show_result_pyplot 4 | 5 | 6 | # config_path = 'configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py' 7 | # checkpoint_path = ('/scratch/KurcGroup/jingwei/gpfs/checkpoint/vmamba/' 8 | # 'upernet_vssm_4xb4-160k_ade20k-512x512_tiny_s_iter_160000.pth') 9 | config_path = 'configs/vssm_2d/upernet_vssm_2d_4xb4-160k_ade20k-512x512_tiny.py' 10 | checkpoint_path = ('/gpfs/scratch/jingwezhang/result/v2dmamba/v2dmamba_fix/v2dmamba_t' 11 | '/segmentation/iter_160000.pth') 12 | img_path = 'demo/demo.png' 13 | validataion_path = ('/scratch/KurcGroup/jingwei/Projects/VMamba/segmentation/' 14 | 'data/ade/ADEChallengeData2016/images/validation') 15 | out_dir = '/gpfs/scratch/jingwezhang/result/v2dmamba/v2dmamba_fix/v2dmamba_t/segmentation/' 16 | try: 17 | import segmentation.model 18 | except: 19 | import model 20 | 21 | if __name__ == '__main__': 22 | image_filenames = [f for f in os.listdir(validataion_path)] 23 | 24 | 25 | # build the model from a config file and a checkpoint file 26 | model = init_model(config_path, checkpoint_path, device='cuda:0') 27 | 28 | # inference on given image 29 | result = inference_model(model, img_path) 30 | 31 | # # display the segmentation result 32 | # vis_image = show_result_pyplot(model, img_path, result) 33 | 34 | # save the visualization result, the output image would be found at the path `work_dirs/result.png` 35 | vis_iamge = show_result_pyplot(model, img_path, result, out_file='work_dirs/result.png') 36 | 37 | # # Modify the time of displaying images, note that 0 is the special value that means "forever" 38 | # vis_image = show_result_pyplot(model, img_path, result, wait_time=5) -------------------------------------------------------------------------------- /cuda_kernel/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -d "build" ]; then 4 | rm -r build 5 | fi 6 | 7 | mkdir build 8 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR=/opt/conda -DCUDA_ARCHS="70;75;80" -DBOUNDARY_CHECK=1 -DNAN_SMEM_CHECK=1 -DNAN_GRAD_CHECK=1 -B build 9 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/home/jzhang/Dev/anaconda3_2023/envs/vmamba" -DCUDA_ARCHS="70;75;80" -DBOUNDARY_CHECK=1 -B build 10 | cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/opt/conda" -DCUDA_ARCHS="70;75;80" -DOUTPUT_DIRECTORY=../../v2dmamba_scan -B build 11 | 12 | #cmake -DCMAKE_BUILD_TYPE=Release -DPython_ROOT_DIR="/home/jzhang/Dev/anaconda3_2023/envs/vmamba" -DCUDA_ARCHS="70;75;80" -DOUTPUT_DIRECTORY=../v2dmamba_scan -B build 13 | 14 | cmake --build build -- -j32 -------------------------------------------------------------------------------- /cuda_kernel/include/scan/commons.h: -------------------------------------------------------------------------------- 1 | #ifndef NDMAMBA_COMMONS_H 2 | #define NDMAMBA_COMMONS_H 3 | 4 | 5 | namespace ndmamba 6 | { 7 | 8 | enum ScanDir : int 9 | { 10 | kHorizontal = 0, 11 | kVertical = 1, 12 | kHorizontalReversed = 2, 13 | kVerticalReversed = 3, 14 | }; 15 | 16 | } // namespace ndmamba 17 | 18 | 19 | #endif // NDMAMBA_COMMONS_H -------------------------------------------------------------------------------- /cuda_kernel/include/selective_scan/global.cuh: -------------------------------------------------------------------------------- 1 | #ifndef NDMAMBA_GLOBAL_CUH 2 | #define NDMAMBA_GLOBAL_CUH 3 | 4 | 5 | namespace ndmamba 6 | { 7 | 8 | // Actual span across input data per block. 9 | inline constexpr int kMaxDimPerBlock = 32; 10 | 11 | } // namespace ndmamba 12 | 13 | 14 | #endif // NDMAMBA_GLOBAL_CUH 15 | -------------------------------------------------------------------------------- /cuda_kernel/include/selective_scan/static_switch.cuh: -------------------------------------------------------------------------------- 1 | // Inspired by https://github.com/NVIDIA/DALI/blob/main/include/dali/core/static_switch.h 2 | // and https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Dispatch.h 3 | 4 | #pragma once 5 | 6 | /// @param COND - a boolean expression to switch by 7 | /// @param CONST_NAME - a name given for the constexpr bool variable. 8 | /// @param ... - code to execute for true and false 9 | /// 10 | /// Usage: 11 | /// ``` 12 | /// BOOL_SWITCH(flag, BoolConst, [&] { 13 | /// some_function(...); 14 | /// }); 15 | /// ``` 16 | #define BOOL_SWITCH(COND, CONST_NAME, ...) \ 17 | [&] { \ 18 | if (COND) { \ 19 | constexpr bool CONST_NAME = true; \ 20 | return __VA_ARGS__(); \ 21 | } else { \ 22 | constexpr bool CONST_NAME = false; \ 23 | return __VA_ARGS__(); \ 24 | } \ 25 | }() 26 | -------------------------------------------------------------------------------- /cuda_kernel/src/pscan.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | std::vector 5 | selective_scan_fwd(const at::Tensor & u, 6 | const at::Tensor & delta, 7 | const at::Tensor & A, 8 | const at::Tensor & B, 9 | const at::Tensor & C, 10 | const c10::optional & D_, 11 | const c10::optional & z_, 12 | const c10::optional & delta_bias_, 13 | bool delta_softplus, 14 | int height, 15 | int width, 16 | bool out_float = true); 17 | 18 | 19 | std::vector 20 | selective_scan_bwd(const at::Tensor & u, 21 | const at::Tensor & delta, 22 | const at::Tensor & A, 23 | const at::Tensor & B, 24 | const at::Tensor & C, 25 | const c10::optional & D_, 26 | const c10::optional & z_, 27 | const c10::optional & delta_bias_, 28 | const at::Tensor & dout, 29 | const c10::optional & x_, 30 | const c10::optional & out_, 31 | c10::optional & dz_, 32 | bool delta_softplus, 33 | bool recompute_out_z, 34 | int height, 35 | int width); 36 | 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 39 | { 40 | using namespace pybind11::literals; 41 | 42 | m.def("fwd", 43 | &selective_scan_fwd, 44 | "Selective scan forward", 45 | "u"_a, 46 | "delta"_a, 47 | "A"_a, 48 | "B"_a, 49 | "C"_a, 50 | "D_"_a, 51 | "z_"_a, 52 | "delta_bias_"_a, 53 | "delta_softplus"_a, 54 | "height"_a, 55 | "width"_a, 56 | "out_float"_a = true); 57 | 58 | m.def("bwd", 59 | &selective_scan_bwd, 60 | "u"_a, 61 | "delta"_a, 62 | "A"_a, 63 | "B"_a, 64 | "C"_a, 65 | "D_"_a, 66 | "z_"_a, 67 | "delta_bias_"_a, 68 | "dout"_a, 69 | "x_"_a, 70 | "out_"_a, 71 | "dz_"_a, 72 | "delta_softplus"_a, 73 | "recompute_out_z"_a, 74 | "height"_a, 75 | "width"_a); 76 | } 77 | -------------------------------------------------------------------------------- /cuda_kernel/src/repo/test_002_warp.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "utils/cuda_utils.h" 7 | #include "scan/block_scan.cuh" 8 | 9 | 10 | template 11 | __global__ void test() 12 | { 13 | using Scan = mamband::SegWarpScan; 14 | typename Scan::TempStorage tempStorage; 15 | Scan scan(tempStorage); 16 | 17 | T input = cub::LaneId() / 8; 18 | T inclusiveOutput; 19 | T segmentAggregate; 20 | scan.InclusiveScan(input, inclusiveOutput, cub::Sum(), segmentAggregate); 21 | printf("lane %u inclusiveOutput = %f warpAggregate = %f\n", cub::LaneId(), inclusiveOutput, segmentAggregate); 22 | } 23 | 24 | 25 | int main() 26 | { 27 | // thrust::device_vector d_out(8 * 16, 1.0f); 28 | test<<<1, 32>>>(); 29 | CUDA_CHECK_LAST_ERROR(); 30 | CUDA_CHECK(cudaDeviceSynchronize()); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /cuda_kernel/src/repo/test_003_warp_hw.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "utils/cuda_utils.h" 7 | #include "scan/block_scan.cuh" 8 | #include "scan/commons.h" 9 | 10 | 11 | 12 | template 13 | struct ScanOp 14 | { 15 | __device__ __forceinline__ T operator()(const T & a, const T & b) = delete; 16 | }; 17 | 18 | 19 | template <> 20 | struct ScanOp 21 | { 22 | __device__ __forceinline__ float2 operator()(const float2 & a, const float2 & b) 23 | { 24 | return {a.x + b.x, a.y + b.y}; 25 | } 26 | }; 27 | 28 | 29 | __global__ void test() 30 | { 31 | using Scan = mamband::SegWarpScan; 32 | typename Scan::TempStorage tempStorage; 33 | Scan scan(tempStorage); 34 | 35 | ScanOp scanOp; 36 | 37 | float2 input; 38 | input.x = cub::LaneId() / 8; 39 | input.y = cub::LaneId() % 8; 40 | 41 | float2 segAgg; 42 | 43 | scan.InclusiveScan(input, input, scanOp, segAgg, mamband::kHorizontal); 44 | printf("lane %u inclusiveOutput = %f %f warpAggregate = %f %f\n", 45 | cub::LaneId(), 46 | input.x, input.y, 47 | segAgg.x, segAgg.y); 48 | 49 | printf("\n"); 50 | 51 | scan.InclusiveScan(input, input, scanOp, segAgg, mamband::kVertical); 52 | printf("lane %u inclusiveOutput = %f %f warpAggregate = %f %f\n", 53 | cub::LaneId(), 54 | input.x, input.y, 55 | segAgg.x, segAgg.y); 56 | } 57 | 58 | 59 | int main() 60 | { 61 | // thrust::device_vector d_out(8 * 16, 1.0f); 62 | test<<<1, 32>>>(); 63 | CUDA_CHECK_LAST_ERROR(); 64 | CUDA_CHECK(cudaDeviceSynchronize()); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /cuda_kernel/src/repo/test_005_block_prefix_callback_op.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "utils/cuda_utils.h" 10 | #include "scan/block_scan.cuh" 11 | #include "scan/commons.h" 12 | 13 | 14 | 15 | template 16 | struct ScanOp 17 | { 18 | __device__ __forceinline__ T operator()(const T & a, const T & b) = delete; 19 | }; 20 | 21 | 22 | template <> 23 | struct ScanOp 24 | { 25 | __device__ __forceinline__ float operator()(const float & a, const float & b) 26 | { 27 | return a + b; 28 | } 29 | }; 30 | 31 | 32 | template <> 33 | struct ScanOp 34 | { 35 | __device__ __forceinline__ float2 operator()(const float2 & a, const float2 & b) 36 | { 37 | return {a.x + b.x, a.y + b.y}; 38 | } 39 | }; 40 | 41 | 42 | template 43 | struct BlockPrefixCallbackOp 44 | { 45 | __device__ BlockPrefixCallbackOp(T runningPrefix) : runningPrefix(runningPrefix) {} 46 | 47 | __device__ T operator()(T blockAggregate) 48 | { 49 | T oldPrefix = runningPrefix; 50 | runningPrefix = ScanOp()(runningPrefix, blockAggregate); 51 | return oldPrefix; 52 | } 53 | 54 | T runningPrefix = 0; 55 | }; 56 | 57 | 58 | template 59 | __global__ void scan() 60 | { 61 | using Scan = cub::BlockScan; 62 | using BlockPrefixCallbackOp = BlockPrefixCallbackOp; 63 | 64 | __shared__ typename Scan::TempStorage tempStorage; 65 | Scan scan(tempStorage); 66 | 67 | BlockPrefixCallbackOp blockPrefixCallbackOp(0); 68 | 69 | T input = 1; 70 | scan.InclusiveScan(input, input, cub::Sum(), blockPrefixCallbackOp); 71 | 72 | input = 1; 73 | scan.InclusiveScan(input, input, cub::Sum(), blockPrefixCallbackOp); 74 | 75 | const int tid = blockIdx.x * blockDim.x + threadIdx.x; 76 | printf("tid %d input = %f\n", tid, input); 77 | 78 | if (tid == 0) 79 | { 80 | printf("blockPrefixCallbackOp.runningPrefix = %f\n", blockPrefixCallbackOp.runningPrefix); 81 | } 82 | } 83 | 84 | 85 | int main() 86 | { 87 | scan<<<1, 64>>>(); 88 | CUDA_CHECK_LAST_ERROR(); 89 | CUDA_CHECK(cudaDeviceSynchronize()); 90 | 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /cuda_kernel/src/selective_scan/selective_scan_bwd_kernel_fp16.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | 5 | // Split into multiple files to compile in paralell 6 | 7 | #include "selective_scan/selective_scan_bwd_kernel.cuh" 8 | 9 | 10 | template void selective_scan_bwd_cuda(SSMParamsBwd & params, cudaStream_t stream); 11 | 12 | template void selective_scan_bwd_cuda(SSMParamsBwd & params, cudaStream_t stream); 13 | -------------------------------------------------------------------------------- /cuda_kernel/src/selective_scan/selective_scan_bwd_kernel_fp32.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | 5 | // Split into multiple files to compile in paralell 6 | 7 | #include "selective_scan/selective_scan_bwd_kernel.cuh" 8 | 9 | 10 | template void selective_scan_bwd_cuda(SSMParamsBwd & params, cudaStream_t stream); 11 | 12 | -------------------------------------------------------------------------------- /cuda_kernel/src/selective_scan/selective_scan_fwd_kernel_fp16.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | 5 | // Split into multiple files to compile in paralell 6 | 7 | #include "selective_scan/selective_scan_fwd_kernel.cuh" 8 | 9 | 10 | template void selective_scan_fwd_cuda(SSMParamsBase & params, cudaStream_t stream); 11 | 12 | template void selective_scan_fwd_cuda(SSMParamsBase & params, cudaStream_t stream); 13 | -------------------------------------------------------------------------------- /cuda_kernel/src/selective_scan/selective_scan_fwd_kernel_fp32.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | 5 | // Split into multiple files to compile in paralell 6 | 7 | #include "selective_scan/selective_scan_fwd_kernel.cuh" 8 | 9 | 10 | template void selective_scan_fwd_cuda(SSMParamsBase & params, cudaStream_t stream); 11 | -------------------------------------------------------------------------------- /misc/compare.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/compare.jpg -------------------------------------------------------------------------------- /misc/cuda.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/cuda.jpg -------------------------------------------------------------------------------- /misc/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/overview.jpg -------------------------------------------------------------------------------- /misc/overview_github.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtlasAnalyticsLab/2DMamba/803a7c16ce72d93888543de79afecb158e7a67a3/misc/overview_github.jpg -------------------------------------------------------------------------------- /v2dmamba_scan/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .pscan import * 3 | --------------------------------------------------------------------------------