├── LOGO_.jpg ├── Readme.md ├── classification ├── ade_corruptions.py ├── attacks │ ├── __init__.py │ ├── attack.py │ ├── bim.py │ ├── difgsm.py │ ├── fgsm.py │ ├── mifgsm.py │ ├── pgd.py │ ├── tifgsm.py │ ├── tpgd.py │ └── vmifgsm.py ├── coco_corruptions.py ├── datasets │ ├── __init__.py │ ├── dataset5k.py │ ├── imagenet_b_dataset.py │ ├── imagenet_dataset.py │ └── imagenet_v2_dataset.py ├── evaluate.py ├── evaluate_scanline_infodrop.py ├── generate_adv_images.py ├── image_list.json ├── imagecorruptions │ ├── __init__.py │ ├── corruptions.py │ └── frost │ │ ├── frost1.png │ │ ├── frost2.png │ │ ├── frost3.png │ │ ├── frost4.jpg │ │ ├── frost5.jpg │ │ └── frost6.jpg ├── imgnet_d2imgnet_id.txt ├── inference.py ├── inference_on_imagenet_c.py ├── models │ ├── __init__.py │ ├── csm_triton.py │ ├── csms6s.py │ ├── vmamba.py │ └── vmamba_checks.py ├── scripts │ ├── evaluate_transferability.sh │ ├── gen_adv_images.sh │ ├── get_adv_freq_results.sh │ ├── random_patch_drop.sh │ ├── salient_drop.sh │ ├── scan_line_info_drop.sh │ └── shuffle_image.sh └── vit_models_ipvit │ ├── __init__.py │ ├── deit.py │ ├── deit_ensemble.py │ ├── deit_modified.py │ ├── dino.py │ ├── resnet.py │ ├── t2t_vit.py │ ├── t2t_vit_dense.py │ ├── t2t_vit_ghost.py │ ├── t2t_vit_se.py │ ├── tnt.py │ ├── token_performer.py │ ├── token_transformer.py │ ├── transformer_block.py │ └── vit.py ├── detection ├── __init__.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── ade20k_instance.py │ │ │ ├── ade20k_panoptic.py │ │ │ ├── ade20k_semantic.py │ │ │ ├── cityscapes_detection.py │ │ │ ├── cityscapes_instance.py │ │ │ ├── coco_caption.py │ │ │ ├── coco_detection.py │ │ │ ├── coco_instance.py │ │ │ ├── coco_instance_semantic.py │ │ │ ├── coco_panoptic.py │ │ │ ├── coco_semantic.py │ │ │ ├── deepfashion.py │ │ │ ├── dsdl.py │ │ │ ├── isaid_instance.py │ │ │ ├── lvis_v0.5_instance.py │ │ │ ├── lvis_v1_instance.py │ │ │ ├── mot_challenge.py │ │ │ ├── mot_challenge_det.py │ │ │ ├── mot_challenge_reid.py │ │ │ ├── objects365v1_detection.py │ │ │ ├── objects365v2_detection.py │ │ │ ├── openimages_detection.py │ │ │ ├── refcoco+.py │ │ │ ├── refcoco.py │ │ │ ├── refcocog.py │ │ │ ├── semi_coco_detection.py │ │ │ ├── v3det.py │ │ │ ├── voc0712.py │ │ │ ├── wider_face.py │ │ │ └── youtube_vis.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade-mask-rcnn_r50_fpn.py │ │ │ ├── cascade-rcnn_r50_fpn.py │ │ │ ├── fast-rcnn_r50_fpn.py │ │ │ ├── faster-rcnn_r50-caffe-c4.py │ │ │ ├── faster-rcnn_r50-caffe-dc5.py │ │ │ ├── faster-rcnn_r50_fpn.py │ │ │ ├── mask-rcnn_r50-caffe-c4.py │ │ │ ├── mask-rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50-caffe-c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ ├── convnext │ │ ├── README.md │ │ ├── cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py │ │ ├── cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py │ │ ├── mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py │ │ └── metafile.yml │ ├── mask_rcnn │ │ ├── README.md │ │ ├── mask-rcnn_r101-caffe_fpn_1x_coco.py │ │ ├── mask-rcnn_r101-caffe_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_r101_fpn_1x_coco.py │ │ ├── mask-rcnn_r101_fpn_2x_coco.py │ │ ├── mask-rcnn_r101_fpn_8xb8-amp-lsj-200e_coco.py │ │ ├── mask-rcnn_r101_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_r18_fpn_8xb8-amp-lsj-200e_coco.py │ │ ├── mask-rcnn_r50-caffe-c4_1x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_1x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_ms-1x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_ms-poly-2x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_r50-caffe_fpn_poly-1x_coco_v1.py │ │ ├── mask-rcnn_r50_fpn_1x-wandb_coco.py │ │ ├── mask-rcnn_r50_fpn_1x_coco.py │ │ ├── mask-rcnn_r50_fpn_2x_coco.py │ │ ├── mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py │ │ ├── mask-rcnn_r50_fpn_amp-1x_coco.py │ │ ├── mask-rcnn_r50_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_r50_fpn_poly-1x_coco.py │ │ ├── mask-rcnn_x101-32x4d_fpn_1x_coco.py │ │ ├── mask-rcnn_x101-32x4d_fpn_2x_coco.py │ │ ├── mask-rcnn_x101-32x4d_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_x101-32x8d_fpn_1x_coco.py │ │ ├── mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py │ │ ├── mask-rcnn_x101-32x8d_fpn_ms-poly-3x_coco.py │ │ ├── mask-rcnn_x101-64x4d_fpn_1x_coco.py │ │ ├── mask-rcnn_x101-64x4d_fpn_2x_coco.py │ │ ├── mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco.py │ │ └── metafile.yml │ ├── swin │ │ ├── README.md │ │ ├── mask-rcnn_swin-s-p4-w7_fpn_amp-ms-crop-3x_coco.py │ │ ├── mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py │ │ ├── mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py │ │ ├── mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py │ │ ├── metafile.yml │ │ └── retinanet_swin-t-p4-w7_fpn_1x_coco.py │ ├── vmamba │ │ ├── mask_rcnn_vmamba_fpn_coco_base.py │ │ ├── mask_rcnn_vmamba_fpn_coco_small.py │ │ ├── mask_rcnn_vmamba_fpn_coco_small_ms_3x.py │ │ ├── mask_rcnn_vmamba_fpn_coco_tiny.py │ │ └── mask_rcnn_vmamba_fpn_coco_tiny_ms_3x.py │ ├── vssm │ │ ├── mask_rcnn_vssm_fpn_coco_base.py │ │ ├── mask_rcnn_vssm_fpn_coco_small.py │ │ ├── mask_rcnn_vssm_fpn_coco_small_ms_3x.py │ │ ├── mask_rcnn_vssm_fpn_coco_tiny.py │ │ └── mask_rcnn_vssm_fpn_coco_tiny_ms_3x.py │ └── vssm1 │ │ ├── mask_rcnn_vssm_fpn_coco_base.py │ │ ├── mask_rcnn_vssm_fpn_coco_small.py │ │ ├── mask_rcnn_vssm_fpn_coco_small_ms_3x.py │ │ ├── mask_rcnn_vssm_fpn_coco_tiny.py │ │ ├── mask_rcnn_vssm_fpn_coco_tiny1.py │ │ ├── mask_rcnn_vssm_fpn_coco_tiny1_ms_3x.py │ │ └── mask_rcnn_vssm_fpn_coco_tiny_ms_3x.py ├── model.py ├── readme.md └── tools │ ├── analysis_tools │ ├── analyze_logs.py │ ├── analyze_results.py │ ├── benchmark.py │ ├── browse_dataset.py │ ├── coco_error_analysis.py │ ├── coco_occluded_separated_recall.py │ ├── confusion_matrix.py │ ├── eval_metric.py │ ├── fuse_results.py │ ├── get_flops.py │ ├── mot │ │ ├── browse_dataset.py │ │ ├── dist_mot_search.sh │ │ ├── mot_error_visualize.py │ │ ├── mot_param_search.py │ │ └── slurm_mot_search.sh │ ├── optimize_anchors.py │ ├── robustness_eval.py │ └── test_robustness.py │ ├── dataset_converters │ ├── ade20k2coco.py │ ├── cityscapes.py │ ├── coco_stuff164k.py │ ├── crowdhuman2coco.py │ ├── images2coco.py │ ├── mot2coco.py │ ├── mot2reid.py │ ├── pascal_voc.py │ ├── prepare_coco_semantic_annos_from_panoptic_annos.py │ ├── scripts │ │ ├── preprocess_coco2017.sh │ │ ├── preprocess_voc2007.sh │ │ └── preprocess_voc2012.sh │ └── youtubevis2coco.py │ ├── deployment │ ├── mmdet2torchserve.py │ ├── mmdet_handler.py │ └── test_torchserver.py │ ├── dist_test.sh │ ├── dist_test_tracking.sh │ ├── dist_train.sh │ ├── misc │ ├── download_dataset.py │ ├── gen_coco_panoptic_test_info.py │ ├── get_crowdhuman_id_hw.py │ ├── get_image_metas.py │ ├── print_config.py │ └── split_coco.py │ ├── model_converters │ ├── detectron2_to_mmdet.py │ ├── detectron2pytorch.py │ ├── detic_to_mmdet.py │ ├── glip_to_mmdet.py │ ├── groundingdino_to_mmdet.py │ ├── publish_model.py │ ├── regnet2mmdet.py │ ├── selfsup2mmdet.py │ ├── swinv1_to_mmdet.py │ ├── upgrade_model_version.py │ └── upgrade_ssd_version.py │ ├── slurm_test.sh │ ├── slurm_test_tracking.sh │ ├── slurm_train.sh │ ├── test.py │ ├── test_tracking.py │ └── train.py ├── kernels └── selective_scan │ ├── README.md │ ├── csrc │ └── selective_scan │ │ ├── cub_extra.cuh │ │ ├── cus │ │ ├── selective_scan.cpp │ │ ├── selective_scan_bwd_kernel.cuh │ │ ├── selective_scan_core_bwd.cu │ │ ├── selective_scan_core_fwd.cu │ │ └── selective_scan_fwd_kernel.cuh │ │ ├── cusndstate │ │ ├── selective_scan_bwd_kernel_ndstate.cuh │ │ ├── selective_scan_core_bwd.cu │ │ ├── selective_scan_core_fwd.cu │ │ ├── selective_scan_fwd_kernel_ndstate.cuh │ │ ├── selective_scan_ndstate.cpp │ │ └── selective_scan_ndstate.h │ │ ├── cusnrow │ │ ├── selective_scan_bwd_kernel_nrow.cuh │ │ ├── selective_scan_core_bwd.cu │ │ ├── selective_scan_core_bwd2.cu │ │ ├── selective_scan_core_bwd3.cu │ │ ├── selective_scan_core_bwd4.cu │ │ ├── selective_scan_core_fwd.cu │ │ ├── selective_scan_core_fwd2.cu │ │ ├── selective_scan_core_fwd3.cu │ │ ├── selective_scan_core_fwd4.cu │ │ ├── selective_scan_fwd_kernel_nrow.cuh │ │ └── selective_scan_nrow.cpp │ │ ├── cusoflex │ │ ├── selective_scan_bwd_kernel_oflex.cuh │ │ ├── selective_scan_core_bwd.cu │ │ ├── selective_scan_core_fwd.cu │ │ ├── selective_scan_fwd_kernel_oflex.cuh │ │ └── selective_scan_oflex.cpp │ │ ├── reverse_scan.cuh │ │ ├── selective_scan.h │ │ ├── selective_scan_common.h │ │ ├── static_switch.h │ │ └── uninitialized_copy.cuh │ ├── setup.py │ └── test_selective_scan.py ├── pretrained_weights └── .gitkeep ├── req.txt └── segmentation ├── __init__.py ├── configs ├── _base_ │ ├── datasets │ │ ├── ade20k.py │ │ ├── ade20k_640x640.py │ │ ├── bdd100k.py │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── cityscapes_1024x1024.py │ │ ├── cityscapes_768x768.py │ │ ├── cityscapes_769x769.py │ │ ├── cityscapes_832x832.py │ │ ├── coco-stuff10k.py │ │ ├── coco-stuff164k.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── isaid.py │ │ ├── levir_256x256.py │ │ ├── loveda.py │ │ ├── mapillary_v1.py │ │ ├── mapillary_v1_65.py │ │ ├── mapillary_v2.py │ │ ├── nyu.py │ │ ├── nyu_512x512.py │ │ ├── pascal_context.py │ │ ├── pascal_context_59.py │ │ ├── pascal_voc12.py │ │ ├── pascal_voc12_aug.py │ │ ├── potsdam.py │ │ ├── refuge.py │ │ ├── stare.py │ │ ├── synapse.py │ │ └── vaihingen.py │ ├── default_runtime.py │ ├── models │ │ ├── ann_r50-d8.py │ │ ├── apcnet_r50-d8.py │ │ ├── bisenetv1_r18-d32.py │ │ ├── bisenetv2.py │ │ ├── ccnet_r50-d8.py │ │ ├── cgnet.py │ │ ├── danet_r50-d8.py │ │ ├── deeplabv3_r50-d8.py │ │ ├── deeplabv3_unet_s5-d16.py │ │ ├── deeplabv3plus_r50-d8.py │ │ ├── dmnet_r50-d8.py │ │ ├── dnl_r50-d8.py │ │ ├── dpt_vit-b16.py │ │ ├── emanet_r50-d8.py │ │ ├── encnet_r50-d8.py │ │ ├── erfnet_fcn.py │ │ ├── fast_scnn.py │ │ ├── fastfcn_r50-d32_jpu_psp.py │ │ ├── fcn_hr18.py │ │ ├── fcn_r50-d8.py │ │ ├── fcn_unet_s5-d16.py │ │ ├── fpn_poolformer_s12.py │ │ ├── fpn_r50.py │ │ ├── gcnet_r50-d8.py │ │ ├── icnet_r50-d8.py │ │ ├── isanet_r50-d8.py │ │ ├── lraspp_m-v3-d8.py │ │ ├── nonlocal_r50-d8.py │ │ ├── ocrnet_hr18.py │ │ ├── ocrnet_r50-d8.py │ │ ├── pointrend_r50.py │ │ ├── psanet_r50-d8.py │ │ ├── pspnet_r50-d8.py │ │ ├── pspnet_unet_s5-d16.py │ │ ├── san_vit-b16.py │ │ ├── segformer_mit-b0.py │ │ ├── segmenter_vit-b16_mask.py │ │ ├── setr_mla.py │ │ ├── setr_naive.py │ │ ├── setr_pup.py │ │ ├── stdc.py │ │ ├── twins_pcpvt-s_fpn.py │ │ ├── twins_pcpvt-s_upernet.py │ │ ├── upernet_beit.py │ │ ├── upernet_convnext.py │ │ ├── upernet_mae.py │ │ ├── upernet_r50.py │ │ ├── upernet_swin.py │ │ ├── upernet_vit-b16_ln_mln.py │ │ └── vpd_sd.py │ └── schedules │ │ ├── schedule_160k.py │ │ ├── schedule_20k.py │ │ ├── schedule_240k.py │ │ ├── schedule_25k.py │ │ ├── schedule_320k.py │ │ ├── schedule_40k.py │ │ └── schedule_80k.py ├── convnext │ ├── README.md │ ├── convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py │ ├── convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py │ ├── convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py │ ├── convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py │ ├── convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py │ ├── convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py │ └── metafile.yaml ├── swin │ ├── README.md │ ├── metafile.yaml │ ├── swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ ├── swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py │ └── swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py ├── upernet │ ├── README.md │ ├── metafile.yaml │ ├── upernet_r101_4xb2-40k_cityscapes-512x1024.py │ ├── upernet_r101_4xb2-40k_cityscapes-769x769.py │ ├── upernet_r101_4xb2-80k_cityscapes-512x1024.py │ ├── upernet_r101_4xb2-80k_cityscapes-769x769.py │ ├── upernet_r101_4xb4-160k_ade20k-512x512.py │ ├── upernet_r101_4xb4-20k_voc12aug-512x512.py │ ├── upernet_r101_4xb4-40k_voc12aug-512x512.py │ ├── upernet_r101_4xb4-80k_ade20k-512x512.py │ ├── upernet_r18_4xb2-40k_cityscapes-512x1024.py │ ├── upernet_r18_4xb2-80k_cityscapes-512x1024.py │ ├── upernet_r18_4xb4-160k_ade20k-512x512.py │ ├── upernet_r18_4xb4-20k_voc12aug-512x512.py │ ├── upernet_r18_4xb4-40k_voc12aug-512x512.py │ ├── upernet_r18_4xb4-80k_ade20k-512x512.py │ ├── upernet_r50_4xb2-40k_cityscapes-512x1024.py │ ├── upernet_r50_4xb2-40k_cityscapes-769x769.py │ ├── upernet_r50_4xb2-80k_cityscapes-512x1024.py │ ├── upernet_r50_4xb2-80k_cityscapes-769x769.py │ ├── upernet_r50_4xb4-160k_ade20k-512x512.py │ ├── upernet_r50_4xb4-20k_voc12aug-512x512.py │ ├── upernet_r50_4xb4-40k_voc12aug-512x512.py │ └── upernet_r50_4xb4-80k_ade20k-512x512.py ├── vit │ ├── README.md │ ├── metafile.yaml │ ├── vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py │ ├── vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py │ ├── vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py │ ├── vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py │ └── vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py ├── vmamba │ ├── upernet_vmamba_4xb4-160k_ade20k-512x512_base.py │ ├── upernet_vmamba_4xb4-160k_ade20k-512x512_small.py │ ├── upernet_vmamba_4xb4-160k_ade20k-512x512_tiny.py │ ├── upernet_vmamba_4xb4-160k_ade20k-640x640_small.py │ └── upernet_vmamba_4xb4-160k_ade20k-896x896_small.py ├── vssm │ ├── upernet_convnext_4xb4-160k_ade20k-640x640_small.py │ ├── upernet_convnext_4xb4-160k_ade20k-896x896_small.py │ ├── upernet_internimage_g_896_160k_ade20k.py │ ├── upernet_swin_4xb4-160k_ade20k-640x640_small.py │ ├── upernet_swin_4xb4-160k_ade20k-896x896_small.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_base.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_small.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py │ ├── upernet_vssm_4xb4-160k_ade20k-640x640_small.py │ └── upernet_vssm_4xb4-160k_ade20k-896x896_small.py └── vssm1 │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_base.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_small.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py │ ├── upernet_vssm_4xb4-160k_ade20k-512x512_tiny1.py │ ├── upernet_vssm_4xb4-160k_ade20k-640x640_small.py │ └── upernet_vssm_4xb4-160k_ade20k-896x896_small.py ├── model.py ├── readme.md └── tools ├── analysis_tools ├── analyze_logs.py ├── benchmark.py ├── browse_dataset.py ├── confusion_matrix.py ├── get_flops.py └── visualization_cam.py ├── dataset_converters ├── chase_db1.py ├── cityscapes.py ├── coco_stuff10k.py ├── coco_stuff164k.py ├── drive.py ├── hrf.py ├── isaid.py ├── levircd.py ├── loveda.py ├── nyu.py ├── pascal_context.py ├── potsdam.py ├── refuge.py ├── stare.py ├── synapse.py ├── vaihingen.py └── voc_aug.py ├── deployment └── pytorch2torchscript.py ├── dist_test.sh ├── dist_train.sh ├── misc ├── browse_dataset.py ├── print_config.py └── publish_model.py ├── model_converters ├── beit2mmseg.py ├── clip2mmseg.py ├── mit2mmseg.py ├── san2mmseg.py ├── stdc2mmseg.py ├── swin2mmseg.py ├── twins2mmseg.py ├── vit2mmseg.py └── vitjax2mmseg.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── torchserve ├── mmseg2torchserve.py ├── mmseg_handler.py └── test_torchserve.py └── train.py /LOGO_.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/LOGO_.jpg -------------------------------------------------------------------------------- /classification/attacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .pgd import PGD 2 | from .fgsm import FGSM 3 | from .bim import BIM 4 | from .mifgsm import MIFGSM 5 | from .difgsm import DIFGSM 6 | from .tpgd import TPGD 7 | from .tifgsm import TIFGSM 8 | from .vmifgsm import VMIFGSM 9 | -------------------------------------------------------------------------------- /classification/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/datasets/__init__.py -------------------------------------------------------------------------------- /classification/datasets/dataset5k.py: -------------------------------------------------------------------------------- 1 | import torchvision 2 | import json 3 | import torch 4 | import os 5 | class ImageNet5k(torchvision.datasets.ImageFolder): 6 | 7 | def __init__(self, image_list="./image_list.json", *args, **kwargs): 8 | self.image_list = set(json.load(open(image_list, "r"))["images"]) 9 | super(ImageNet5k, self).__init__(is_valid_file=self.is_valid_file, *args, **kwargs) 10 | 11 | def is_valid_file(self, x: str) -> bool: 12 | 13 | file_path = x 14 | # get image name 15 | image_name = os.path.basename(file_path) 16 | # get parent folder name 17 | folder_name = os.path.basename(os.path.dirname(file_path)) 18 | 19 | return f"{folder_name}/{image_name}" in self.image_list 20 | 21 | 22 | 23 | if __name__ == "__main__": 24 | import matplotlib.pyplot as plt 25 | import torchvision.transforms as transforms 26 | import os 27 | 28 | # Load the image list 29 | 30 | 31 | # Load the ImageNet dataset 32 | imagenet = ImageNet5k(root=r"datasets\ImageNet\val", transform=transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()])) 33 | 34 | dataloader = torch.utils.data.DataLoader(imagenet, batch_size=50, shuffle=True) 35 | 36 | for i, (img, label) in enumerate(dataloader): 37 | print(i, img.shape) 38 | -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost1.png -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost2.png -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost3.png -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost4.jpg -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost5.jpg -------------------------------------------------------------------------------- /classification/imagecorruptions/frost/frost6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/classification/imagecorruptions/frost/frost6.jpg -------------------------------------------------------------------------------- /classification/scripts/evaluate_transferability.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | batch_size=${1:-64} 4 | 5 | #model_names=("resnet18" "resnet50" "vgg16_bn" "vgg19_bn" "densenet121" "densenet161" "vit_tiny_patch16_224" "vit_small_patch16_224" "vit_base_patch16_224" "deit_tiny_patch16_224" "deit_small_patch16_224" "deit_base_patch16_224" "swin_tiny_patch4_window7_224" "swin_small_patch4_window7_224" "swin_base_patch4_window7_224" "vssm_tiny_v2" "vssm_small_v2" "vssm_base_v2") 6 | 7 | model_names=("resnet50") 8 | 9 | for data_path in AdvExamples/*/*/*.pt 10 | do 11 | echo "Evaluating transferability for adversarial examples: ${data_path}" 12 | for model_name in "${model_names[@]}" 13 | do 14 | echo "Evaluating transferability for ${model_name}" 15 | python inference.py --dataset imagenet_adv --data_dir ${data_path} --batch_size ${batch_size} --source_model_name ${model_name} 16 | done 17 | done 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /classification/vit_models_ipvit/__init__.py: -------------------------------------------------------------------------------- 1 | # from .deit import * 2 | # from .deit_ensemble import * 3 | # from .deit_modified import * 4 | from .dino import * 5 | # from .t2t_vit import * 6 | # from .t2t_vit_dense import * 7 | # from .t2t_vit_ghost import * 8 | # from .t2t_vit_se import * 9 | # from .tnt import * 10 | # from .vit import * 11 | # from .resnet import drop_resnet50 12 | -------------------------------------------------------------------------------- /detection/__init__.py: -------------------------------------------------------------------------------- 1 | # configs/ and tools/ is copied from https://github.com/open-mmlab/mmdetection: version 3.3.0 2 | # tools/train.py#12 is added with "import model" 3 | 4 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/ade20k_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KInstanceDataset' 3 | data_root = 'data/ADEChallengeData2016/' 4 | 5 | # Example to use different file client 6 | # Method 1: simply set the data root and let the file I/O module 7 | # automatically infer from prefix (not support LMDB and Memcache yet) 8 | 9 | # data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/' 10 | 11 | # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 12 | # backend_args = dict( 13 | # backend='petrel', 14 | # path_mapping=dict({ 15 | # './data/': 's3://openmmlab/datasets/detection/', 16 | # 'data/': 's3://openmmlab/datasets/detection/' 17 | # })) 18 | backend_args = None 19 | 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile', backend_args=backend_args), 22 | dict(type='Resize', scale=(2560, 640), keep_ratio=True), 23 | # If you don't have a gt annotation, delete the pipeline 24 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 25 | dict( 26 | type='PackDetInputs', 27 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 28 | 'scale_factor')) 29 | ] 30 | 31 | val_dataloader = dict( 32 | batch_size=1, 33 | num_workers=2, 34 | persistent_workers=True, 35 | drop_last=False, 36 | sampler=dict(type='DefaultSampler', shuffle=False), 37 | dataset=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | ann_file='ade20k_instance_val.json', 41 | data_prefix=dict(img='images/validation'), 42 | test_mode=True, 43 | pipeline=test_pipeline, 44 | backend_args=backend_args)) 45 | test_dataloader = val_dataloader 46 | 47 | val_evaluator = dict( 48 | type='CocoMetric', 49 | ann_file=data_root + 'ade20k_instance_val.json', 50 | metric=['bbox', 'segm'], 51 | format_only=False, 52 | backend_args=backend_args) 53 | test_evaluator = val_evaluator 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/ade20k_panoptic.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KPanopticDataset' 3 | data_root = 'data/ADEChallengeData2016/' 4 | 5 | backend_args = None 6 | 7 | test_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='Resize', scale=(2560, 640), keep_ratio=True), 10 | dict(type='LoadPanopticAnnotations', backend_args=backend_args), 11 | dict( 12 | type='PackDetInputs', 13 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 14 | 'scale_factor')) 15 | ] 16 | 17 | val_dataloader = dict( 18 | batch_size=1, 19 | num_workers=0, 20 | persistent_workers=False, 21 | drop_last=False, 22 | sampler=dict(type='DefaultSampler', shuffle=False), 23 | dataset=dict( 24 | type=dataset_type, 25 | data_root=data_root, 26 | ann_file='ade20k_panoptic_val.json', 27 | data_prefix=dict(img='images/validation/', seg='ade20k_panoptic_val/'), 28 | test_mode=True, 29 | pipeline=test_pipeline, 30 | backend_args=backend_args)) 31 | test_dataloader = val_dataloader 32 | 33 | val_evaluator = dict( 34 | type='CocoPanopticMetric', 35 | ann_file=data_root + 'ade20k_panoptic_val.json', 36 | seg_prefix=data_root + 'ade20k_panoptic_val/', 37 | backend_args=backend_args) 38 | test_evaluator = val_evaluator 39 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/ade20k_semantic.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'ADE20KSegDataset' 2 | data_root = 'data/ADEChallengeData2016/' 3 | 4 | # Example to use different file client 5 | # Method 1: simply set the data root and let the file I/O module 6 | # automatically infer from prefix (not support LMDB and Memcache yet) 7 | 8 | # data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/' 9 | 10 | # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 11 | # backend_args = dict( 12 | # backend='petrel', 13 | # path_mapping=dict({ 14 | # './data/': 's3://openmmlab/datasets/detection/', 15 | # 'data/': 's3://openmmlab/datasets/detection/' 16 | # })) 17 | backend_args = None 18 | 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile', backend_args=backend_args), 21 | dict(type='Resize', scale=(2048, 512), keep_ratio=True), 22 | dict( 23 | type='LoadAnnotations', 24 | with_bbox=False, 25 | with_mask=False, 26 | with_seg=True, 27 | reduce_zero_label=True), 28 | dict( 29 | type='PackDetInputs', meta_keys=('img_path', 'ori_shape', 'img_shape')) 30 | ] 31 | 32 | val_dataloader = dict( 33 | batch_size=1, 34 | num_workers=2, 35 | persistent_workers=True, 36 | drop_last=False, 37 | sampler=dict(type='DefaultSampler', shuffle=False), 38 | dataset=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | data_prefix=dict( 42 | img_path='images/validation', 43 | seg_map_path='annotations/validation'), 44 | pipeline=test_pipeline)) 45 | test_dataloader = val_dataloader 46 | 47 | val_evaluator = dict(type='SemSegMetric', iou_metrics=['mIoU']) 48 | test_evaluator = val_evaluator 49 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'lvis_v0.5_instance.py' 3 | dataset_type = 'LVISV1Dataset' 4 | data_root = 'data/lvis_v1/' 5 | 6 | train_dataloader = dict( 7 | dataset=dict( 8 | dataset=dict( 9 | type=dataset_type, 10 | data_root=data_root, 11 | ann_file='annotations/lvis_v1_train.json', 12 | data_prefix=dict(img='')))) 13 | val_dataloader = dict( 14 | dataset=dict( 15 | type=dataset_type, 16 | data_root=data_root, 17 | ann_file='annotations/lvis_v1_val.json', 18 | data_prefix=dict(img=''))) 19 | test_dataloader = val_dataloader 20 | 21 | val_evaluator = dict(ann_file=data_root + 'annotations/lvis_v1_val.json') 22 | test_evaluator = val_evaluator 23 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/refcoco+.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'RefCocoDataset' 3 | data_root = 'data/coco/' 4 | 5 | backend_args = None 6 | 7 | test_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 10 | dict( 11 | type='LoadAnnotations', 12 | with_mask=True, 13 | with_bbox=False, 14 | with_seg=False, 15 | with_label=False), 16 | dict( 17 | type='PackDetInputs', 18 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 19 | 'scale_factor', 'gt_masks', 'text')) 20 | ] 21 | 22 | val_dataloader = dict( 23 | batch_size=1, 24 | num_workers=2, 25 | persistent_workers=True, 26 | drop_last=False, 27 | sampler=dict(type='DefaultSampler', shuffle=False), 28 | dataset=dict( 29 | type=dataset_type, 30 | data_root=data_root, 31 | data_prefix=dict(img_path='train2014/'), 32 | ann_file='refcoco+/instances.json', 33 | split_file='refcoco+/refs(unc).p', 34 | split='val', 35 | text_mode='select_first', 36 | pipeline=test_pipeline)) 37 | 38 | test_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | data_prefix=dict(img_path='train2014/'), 48 | ann_file='refcoco+/instances.json', 49 | split_file='refcoco+/refs(unc).p', 50 | split='testA', # or 'testB' 51 | text_mode='select_first', 52 | pipeline=test_pipeline)) 53 | 54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU']) 55 | test_evaluator = val_evaluator 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/refcoco.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'RefCocoDataset' 3 | data_root = 'data/coco/' 4 | 5 | backend_args = None 6 | 7 | test_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 10 | dict( 11 | type='LoadAnnotations', 12 | with_mask=True, 13 | with_bbox=False, 14 | with_seg=False, 15 | with_label=False), 16 | dict( 17 | type='PackDetInputs', 18 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 19 | 'scale_factor', 'gt_masks', 'text')) 20 | ] 21 | 22 | val_dataloader = dict( 23 | batch_size=1, 24 | num_workers=2, 25 | persistent_workers=True, 26 | drop_last=False, 27 | sampler=dict(type='DefaultSampler', shuffle=False), 28 | dataset=dict( 29 | type=dataset_type, 30 | data_root=data_root, 31 | data_prefix=dict(img_path='train2014/'), 32 | ann_file='refcoco/instances.json', 33 | split_file='refcoco/refs(unc).p', 34 | split='val', 35 | text_mode='select_first', 36 | pipeline=test_pipeline)) 37 | 38 | test_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | data_prefix=dict(img_path='train2014/'), 48 | ann_file='refcoco/instances.json', 49 | split_file='refcoco/refs(unc).p', 50 | split='testA', # or 'testB' 51 | text_mode='select_first', 52 | pipeline=test_pipeline)) 53 | 54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU']) 55 | test_evaluator = val_evaluator 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/refcocog.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'RefCocoDataset' 3 | data_root = 'data/coco/' 4 | 5 | backend_args = None 6 | 7 | test_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 10 | dict( 11 | type='LoadAnnotations', 12 | with_mask=True, 13 | with_bbox=False, 14 | with_seg=False, 15 | with_label=False), 16 | dict( 17 | type='PackDetInputs', 18 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 19 | 'scale_factor', 'gt_masks', 'text')) 20 | ] 21 | 22 | val_dataloader = dict( 23 | batch_size=1, 24 | num_workers=2, 25 | persistent_workers=True, 26 | drop_last=False, 27 | sampler=dict(type='DefaultSampler', shuffle=False), 28 | dataset=dict( 29 | type=dataset_type, 30 | data_root=data_root, 31 | data_prefix=dict(img_path='train2014/'), 32 | ann_file='refcocog/instances.json', 33 | split_file='refcocog/refs(umd).p', 34 | split='val', 35 | text_mode='select_first', 36 | pipeline=test_pipeline)) 37 | 38 | test_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | data_prefix=dict(img_path='train2014/'), 48 | ann_file='refcocog/instances.json', 49 | split_file='refcocog/refs(umd).p', 50 | split='test', 51 | text_mode='select_first', 52 | pipeline=test_pipeline)) 53 | 54 | val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU']) 55 | test_evaluator = val_evaluator 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmdet' 2 | 3 | default_hooks = dict( 4 | timer=dict(type='IterTimerHook'), 5 | logger=dict(type='LoggerHook', interval=50), 6 | param_scheduler=dict(type='ParamSchedulerHook'), 7 | checkpoint=dict(type='CheckpointHook', interval=1), 8 | sampler_seed=dict(type='DistSamplerSeedHook'), 9 | visualization=dict(type='DetVisualizationHook')) 10 | 11 | env_cfg = dict( 12 | cudnn_benchmark=False, 13 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 14 | dist_cfg=dict(backend='nccl'), 15 | ) 16 | 17 | vis_backends = [dict(type='LocalVisBackend')] 18 | visualizer = dict( 19 | type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 20 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 21 | 22 | log_level = 'INFO' 23 | load_from = None 24 | resume = False 25 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 1x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=12, 14 | by_epoch=True, 15 | milestones=[8, 11], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # training schedule for 20e 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=20, 14 | by_epoch=True, 15 | milestones=[16, 19], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 2x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=24, 14 | by_epoch=True, 15 | milestones=[16, 22], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py' # noqa 2 | 3 | # please install mmpretrain 4 | # import mmpretrain.models to trigger register_module in mmpretrain 5 | custom_imports = dict( 6 | imports=['mmpretrain.models'], allow_failed_imports=False) 7 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type='mmpretrain.ConvNeXt', 13 | arch='small', 14 | out_indices=[0, 1, 2, 3], 15 | drop_path_rate=0.6, 16 | layer_scale_init_value=1.0, 17 | gap_before_final_norm=False, 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint=checkpoint_file, 20 | prefix='backbone.'))) 21 | 22 | optim_wrapper = dict(paramwise_cfg={ 23 | 'decay_rate': 0.7, 24 | 'decay_type': 'layer_wise', 25 | 'num_layers': 12 26 | }) 27 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101-caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50-caffe_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict( 6 | type='Pretrained', 7 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 8 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101-caffe_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | # use caffe img_norm 8 | data_preprocessor=dict( 9 | mean=[103.530, 116.280, 123.675], 10 | std=[1.0, 1.0, 1.0], 11 | bgr_to_rgb=False), 12 | backbone=dict( 13 | depth=101, 14 | norm_cfg=dict(requires_grad=False), 15 | norm_eval=True, 16 | style='caffe', 17 | init_cfg=dict( 18 | type='Pretrained', 19 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 20 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101_fpn_8xb8-amp-lsj-200e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r101_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | depth=101, 9 | init_cfg=dict(type='Pretrained', 10 | checkpoint='torchvision://resnet101'))) 11 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r18_fpn_8xb8-amp-lsj-200e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=18, 6 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), 7 | neck=dict(in_channels=[64, 128, 256, 512])) 8 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50-caffe-c4.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | # use caffe img_norm 4 | data_preprocessor=dict( 5 | mean=[103.530, 116.280, 123.675], 6 | std=[1.0, 1.0, 1.0], 7 | bgr_to_rgb=False), 8 | backbone=dict( 9 | norm_cfg=dict(requires_grad=False), 10 | style='caffe', 11 | init_cfg=dict( 12 | type='Pretrained', 13 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 14 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | 3 | model = dict( 4 | # use caffe img_norm 5 | data_preprocessor=dict( 6 | mean=[103.530, 116.280, 123.675], 7 | std=[1.0, 1.0, 1.0], 8 | bgr_to_rgb=False), 9 | backbone=dict( 10 | norm_cfg=dict(requires_grad=False), 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 15 | 16 | train_pipeline = [ 17 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 18 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 19 | dict( 20 | type='RandomChoiceResize', 21 | scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 22 | (1333, 768), (1333, 800)], 23 | keep_ratio=True), 24 | dict(type='RandomFlip', prob=0.5), 25 | dict(type='PackDetInputs'), 26 | ] 27 | 28 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 29 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | 3 | model = dict( 4 | # use caffe img_norm 5 | data_preprocessor=dict( 6 | mean=[103.530, 116.280, 123.675], 7 | std=[1.0, 1.0, 1.0], 8 | bgr_to_rgb=False), 9 | backbone=dict( 10 | norm_cfg=dict(requires_grad=False), 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 17 | dict( 18 | type='LoadAnnotations', 19 | with_bbox=True, 20 | with_mask=True, 21 | poly2mask=False), 22 | dict( 23 | type='RandomChoiceResize', 24 | scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 25 | (1333, 768), (1333, 800)], 26 | keep_ratio=True), 27 | dict(type='RandomFlip', prob=0.5), 28 | dict(type='PackDetInputs') 29 | ] 30 | 31 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 32 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py' 2 | 3 | train_cfg = dict(max_epochs=24) 4 | # learning rate 5 | param_scheduler = [ 6 | dict( 7 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 8 | dict( 9 | type='MultiStepLR', 10 | begin=0, 11 | end=24, 12 | by_epoch=True, 13 | milestones=[16, 22], 14 | gamma=0.1) 15 | ] 16 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py' 2 | 3 | train_cfg = dict(max_epochs=36) 4 | # learning rate 5 | param_scheduler = [ 6 | dict( 7 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 8 | dict( 9 | type='MultiStepLR', 10 | begin=0, 11 | end=24, 12 | by_epoch=True, 13 | milestones=[28, 34], 14 | gamma=0.1) 15 | ] 16 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_poly-1x_coco_v1.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | 3 | model = dict( 4 | # use caffe img_norm 5 | data_preprocessor=dict( 6 | mean=[103.530, 116.280, 123.675], 7 | std=[1.0, 1.0, 1.0], 8 | bgr_to_rgb=False), 9 | backbone=dict( 10 | norm_cfg=dict(requires_grad=False), 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 15 | rpn_head=dict( 16 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 17 | roi_head=dict( 18 | bbox_roi_extractor=dict( 19 | roi_layer=dict( 20 | type='RoIAlign', 21 | output_size=7, 22 | sampling_ratio=2, 23 | aligned=False)), 24 | bbox_head=dict( 25 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), 26 | mask_roi_extractor=dict( 27 | roi_layer=dict( 28 | type='RoIAlign', 29 | output_size=14, 30 | sampling_ratio=2, 31 | aligned=False)))) 32 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] 8 | visualizer = dict(vis_backends=vis_backends) 9 | 10 | # MMEngine support the following two ways, users can choose 11 | # according to convenience 12 | # default_hooks = dict(checkpoint=dict(interval=4)) 13 | _base_.default_hooks.checkpoint.interval = 4 14 | 15 | # train_cfg = dict(val_interval=2) 16 | _base_.train_cfg.val_interval = 2 17 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_8xb8-amp-lsj-200e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../common/lsj-100e_coco-instance.py' 4 | ] 5 | image_size = (1024, 1024) 6 | batch_augments = [ 7 | dict(type='BatchFixedSizePad', size=image_size, pad_mask=True) 8 | ] 9 | 10 | model = dict(data_preprocessor=dict(batch_augments=batch_augments)) 11 | 12 | train_dataloader = dict(batch_size=8, num_workers=4) 13 | # Enable automatic-mixed-precision training with AmpOptimWrapper. 14 | optim_wrapper = dict( 15 | type='AmpOptimWrapper', 16 | optimizer=dict( 17 | type='SGD', lr=0.02 * 4, momentum=0.9, weight_decay=0.00004)) 18 | 19 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 20 | # USER SHOULD NOT CHANGE ITS VALUES. 21 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 22 | auto_scale_lr = dict(base_batch_size=64) 23 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_amp-1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r50_fpn_1x_coco.py' 2 | 3 | # Enable automatic-mixed-precision training with AmpOptimWrapper. 4 | optim_wrapper = dict(type='AmpOptimWrapper') 5 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 9 | dict( 10 | type='LoadAnnotations', 11 | with_bbox=True, 12 | with_mask=True, 13 | poly2mask=False), 14 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PackDetInputs'), 17 | ] 18 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 19 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r101_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x4d_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | type='ResNeXt', 9 | depth=101, 10 | groups=32, 11 | base_width=4, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | style='pytorch', 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 19 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py' 2 | 3 | model = dict( 4 | # ResNeXt-101-32x8d model trained with Caffe2 at FB, 5 | # so the mean and std need to be changed. 6 | data_preprocessor=dict( 7 | mean=[103.530, 116.280, 123.675], 8 | std=[57.375, 57.120, 58.395], 9 | bgr_to_rgb=False), 10 | backbone=dict( 11 | type='ResNeXt', 12 | depth=101, 13 | groups=32, 14 | base_width=8, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type='BN', requires_grad=False), 19 | style='pytorch', 20 | init_cfg=dict( 21 | type='Pretrained', 22 | checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) 23 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_r101_fpn_1x_coco.py' 2 | 3 | model = dict( 4 | # ResNeXt-101-32x8d model trained with Caffe2 at FB, 5 | # so the mean and std need to be changed. 6 | data_preprocessor=dict( 7 | mean=[103.530, 116.280, 123.675], 8 | std=[57.375, 57.120, 58.395], 9 | bgr_to_rgb=False), 10 | backbone=dict( 11 | type='ResNeXt', 12 | depth=101, 13 | groups=32, 14 | base_width=8, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type='BN', requires_grad=False), 19 | style='pytorch', 20 | init_cfg=dict( 21 | type='Pretrained', 22 | checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) 23 | 24 | train_pipeline = [ 25 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 26 | dict( 27 | type='LoadAnnotations', 28 | with_bbox=True, 29 | with_mask=True, 30 | poly2mask=False), 31 | dict( 32 | type='RandomChoiceResize', 33 | scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 34 | (1333, 768), (1333, 800)], 35 | keep_ratio=True), 36 | dict(type='RandomFlip', prob=0.5), 37 | dict(type='PackDetInputs'), 38 | ] 39 | 40 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 41 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | # ResNeXt-101-32x8d model trained with Caffe2 at FB, 8 | # so the mean and std need to be changed. 9 | data_preprocessor=dict( 10 | mean=[103.530, 116.280, 123.675], 11 | std=[57.375, 57.120, 58.395], 12 | bgr_to_rgb=False), 13 | backbone=dict( 14 | type='ResNeXt', 15 | depth=101, 16 | groups=32, 17 | base_width=8, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=False), 22 | style='pytorch', 23 | init_cfg=dict( 24 | type='Pretrained', 25 | checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) 26 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_x101-32x4d_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_x101-32x4d_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn/mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/ms-poly_3x_coco-instance.py', 3 | '../_base_/models/mask-rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | type='ResNeXt', 9 | depth=101, 10 | groups=64, 11 | base_width=4, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | style='pytorch', 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 19 | -------------------------------------------------------------------------------- /detection/configs/swin/mask-rcnn_swin-s-p4-w7_fpn_amp-ms-crop-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py' 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa 3 | model = dict( 4 | backbone=dict( 5 | depths=[2, 2, 18, 2], 6 | init_cfg=dict(type='Pretrained', checkpoint=pretrained))) 7 | -------------------------------------------------------------------------------- /detection/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask-rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa 7 | model = dict( 8 | type='MaskRCNN', 9 | backbone=dict( 10 | _delete_=True, 11 | type='SwinTransformer', 12 | embed_dims=96, 13 | depths=[2, 2, 6, 2], 14 | num_heads=[3, 6, 12, 24], 15 | window_size=7, 16 | mlp_ratio=4, 17 | qkv_bias=True, 18 | qk_scale=None, 19 | drop_rate=0., 20 | attn_drop_rate=0., 21 | drop_path_rate=0.2, 22 | patch_norm=True, 23 | out_indices=(0, 1, 2, 3), 24 | with_cp=False, 25 | convert_weights=True, 26 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 27 | neck=dict(in_channels=[96, 192, 384, 768])) 28 | 29 | max_epochs = 12 30 | train_cfg = dict(max_epochs=max_epochs) 31 | 32 | # learning rate 33 | param_scheduler = [ 34 | dict( 35 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 36 | end=1000), 37 | dict( 38 | type='MultiStepLR', 39 | begin=0, 40 | end=max_epochs, 41 | by_epoch=True, 42 | milestones=[8, 11], 43 | gamma=0.1) 44 | ] 45 | 46 | # optimizer 47 | optim_wrapper = dict( 48 | type='OptimWrapper', 49 | paramwise_cfg=dict( 50 | custom_keys={ 51 | 'absolute_pos_embed': dict(decay_mult=0.), 52 | 'relative_position_bias_table': dict(decay_mult=0.), 53 | 'norm': dict(decay_mult=0.) 54 | }), 55 | optimizer=dict( 56 | _delete_=True, 57 | type='AdamW', 58 | lr=0.0001, 59 | betas=(0.9, 0.999), 60 | weight_decay=0.05)) 61 | -------------------------------------------------------------------------------- /detection/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py' 2 | # Enable automatic-mixed-precision training with AmpOptimWrapper. 3 | optim_wrapper = dict(type='AmpOptimWrapper') 4 | -------------------------------------------------------------------------------- /detection/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa 7 | model = dict( 8 | backbone=dict( 9 | _delete_=True, 10 | type='SwinTransformer', 11 | embed_dims=96, 12 | depths=[2, 2, 6, 2], 13 | num_heads=[3, 6, 12, 24], 14 | window_size=7, 15 | mlp_ratio=4, 16 | qkv_bias=True, 17 | qk_scale=None, 18 | drop_rate=0., 19 | attn_drop_rate=0., 20 | drop_path_rate=0.2, 21 | patch_norm=True, 22 | out_indices=(1, 2, 3), 23 | # Please only add indices that would be used 24 | # in FPN, otherwise some parameter will not be used 25 | with_cp=False, 26 | convert_weights=True, 27 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 28 | neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5)) 29 | 30 | # optimizer 31 | optim_wrapper = dict(optimizer=dict(lr=0.01)) 32 | -------------------------------------------------------------------------------- /detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MMDET_VSSM', 8 | depths=(2, 2, 27, 2), 9 | dims=128, 10 | out_indices=(0, 1, 2, 3), 11 | pretrained="../../ckpts/vssmbase/ckpt_epoch_260.pth", 12 | ), 13 | neck=dict(in_channels=[128, 256, 512, 1024]), 14 | ) 15 | 16 | # too big 17 | train_dataloader = dict(batch_size=1) # as gpus=16 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MMDET_VSSM', 8 | depths=(2, 2, 27, 2), 9 | dims=96, 10 | out_indices=(0, 1, 2, 3), 11 | pretrained="../../ckpts/vssmsmall/ckpt_epoch_292.pth", 12 | ), 13 | ) 14 | 15 | # train_dataloader = dict(batch_size=2) # as gpus=8 16 | 17 | -------------------------------------------------------------------------------- /detection/configs/vmamba/mask_rcnn_vmamba_fpn_coco_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MMDET_VSSM', 8 | depths=(2, 2, 9, 2), 9 | dims=96, 10 | out_indices=(0, 1, 2, 3), 11 | pretrained="../../ckpts/vssmtiny/ckpt_epoch_292.pth", 12 | ), 13 | ) 14 | 15 | # train_dataloader = dict(batch_size=2) # as gpus=8 16 | 17 | -------------------------------------------------------------------------------- /detection/configs/vssm/mask_rcnn_vssm_fpn_coco_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="../../ckpts/classification/outs/vssm/vssmbasedp05/vssmbase_dp05_ckpt_epoch_260.pth", 10 | # copied from classification/configs/vssm/vssm_base_224.yaml 11 | dims=128, 12 | depths=(2, 2, 27, 2), 13 | ssm_d_state=16, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | mlp_ratio=0.0, 17 | downsample_version="v1", 18 | patchembed_version="v1", 19 | # forward_type="v0", # if you want exactly the same 20 | ), 21 | neck=dict(in_channels=[128, 256, 512, 1024]), 22 | ) 23 | 24 | # too big 25 | train_dataloader = dict(batch_size=1) # as gpus=16 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /detection/configs/vssm/mask_rcnn_vssm_fpn_coco_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth", 10 | # copied from classification/configs/vssm/vssm_small_224.yaml 11 | dims=96, 12 | depths=(2, 2, 27, 2), 13 | ssm_d_state=16, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | mlp_ratio=0.0, 17 | downsample_version="v1", 18 | patchembed_version="v1", 19 | # forward_type="v0", # if you want exactly the same 20 | ), 21 | ) 22 | 23 | # train_dataloader = dict(batch_size=2) # as gpus=8 24 | 25 | -------------------------------------------------------------------------------- /detection/configs/vssm/mask_rcnn_vssm_fpn_coco_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="../../ckpts/classification/outs/vssm/vssmtiny/vssmtiny_dp01_ckpt_epoch_292.pth", 10 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 11 | dims=96, 12 | depths=(2, 2, 9, 2), 13 | ssm_d_state=16, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | mlp_ratio=0.0, 17 | downsample_version="v1", 18 | patchembed_version="v1", 19 | # forward_type="v0", # if you want exactly the same 20 | ), 21 | ) 22 | 23 | # train_dataloader = dict(batch_size=2) # as gpus=8 24 | 25 | -------------------------------------------------------------------------------- /detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="", 10 | # copied from classification/configs/vssm/vssm_base_224.yaml 11 | dims=128, 12 | depths=(2, 2, 15, 2), 13 | ssm_d_state=1, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | ssm_conv=3, 17 | ssm_conv_bias=False, 18 | forward_type="v05_noz", # v3_noz 19 | mlp_ratio=4.0, 20 | downsample_version="v3", 21 | patchembed_version="v2", 22 | drop_path_rate=0.6, 23 | norm_layer="ln2d", 24 | ), 25 | neck=dict(in_channels=[128, 256, 512, 1024]), 26 | ) 27 | 28 | # too big 29 | # train_dataloader = dict(batch_size=1) # as gpus=16 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="", 10 | # copied from classification/configs/vssm/vssm_small_224.yaml 11 | dims=96, 12 | depths=(2, 2, 15, 2), 13 | ssm_d_state=1, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | ssm_conv=3, 17 | ssm_conv_bias=False, 18 | forward_type="v05_noz", # v3_noz 19 | mlp_ratio=4.0, 20 | downsample_version="v3", 21 | patchembed_version="v2", 22 | drop_path_rate=0.3, 23 | norm_layer="ln2d", 24 | ), 25 | ) 26 | 27 | # train_dataloader = dict(batch_size=2) # as gpus=8 28 | 29 | -------------------------------------------------------------------------------- /detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="", 10 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 11 | dims=96, 12 | # depths=(2, 2, 5, 2), 13 | depths=(2, 2, 8, 2), 14 | ssm_d_state=1, 15 | ssm_dt_rank="auto", 16 | # ssm_ratio=2.0, 17 | ssm_ratio=1.0, 18 | ssm_conv=3, 19 | ssm_conv_bias=False, 20 | forward_type="v05_noz", # v3_noz 21 | mlp_ratio=4.0, 22 | downsample_version="v3", 23 | patchembed_version="v2", 24 | drop_path_rate=0.2, 25 | norm_layer="ln2d", 26 | ), 27 | ) 28 | 29 | # train_dataloader = dict(batch_size=2) # as gpus=8 30 | 31 | -------------------------------------------------------------------------------- /detection/configs/vssm1/mask_rcnn_vssm_fpn_coco_tiny1.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py' 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | type='MM_VSSM', 8 | out_indices=(0, 1, 2, 3), 9 | pretrained="", 10 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 11 | dims=96, 12 | depths=(2, 2, 5, 2), 13 | ssm_d_state=1, 14 | ssm_dt_rank="auto", 15 | ssm_ratio=2.0, 16 | ssm_conv=3, 17 | ssm_conv_bias=False, 18 | forward_type="v05_noz", # v3_noz 19 | mlp_ratio=4.0, 20 | downsample_version="v3", 21 | patchembed_version="v2", 22 | drop_path_rate=0.2, 23 | norm_layer="ln2d", 24 | ), 25 | ) 26 | 27 | # train_dataloader = dict(batch_size=2) # as gpus=8 28 | 29 | -------------------------------------------------------------------------------- /detection/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import partial 3 | from typing import Callable 4 | 5 | import torch 6 | from torch import nn 7 | from torch.utils import checkpoint 8 | 9 | from mmengine.model import BaseModule 10 | from mmdet.registry import MODELS as MODELS_MMDET 11 | from mmseg.registry import MODELS as MODELS_MMSEG 12 | 13 | def import_abspy(name="models", path="classification/"): 14 | import sys 15 | import importlib 16 | path = os.path.abspath(path) 17 | assert os.path.isdir(path) 18 | sys.path.insert(0, path) 19 | module = importlib.import_module(name) 20 | sys.path.pop(0) 21 | return module 22 | 23 | build = import_abspy( 24 | "models", 25 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"), 26 | ) 27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM 28 | 29 | @MODELS_MMSEG.register_module() 30 | @MODELS_MMDET.register_module() 31 | class MM_VSSM(BaseModule, Backbone_VSSM): 32 | def __init__(self, *args, **kwargs): 33 | BaseModule.__init__(self) 34 | Backbone_VSSM.__init__(self, *args, **kwargs) 35 | 36 | -------------------------------------------------------------------------------- /detection/readme.md: -------------------------------------------------------------------------------- 1 | ## origins 2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmdetection: `version 3.3.0` 3 | 4 | 5 | ## modifications 6 | `tools/train.py#12` is added with `import model` 7 | `tools/test.py#17` is added with `import model` 8 | 9 | -------------------------------------------------------------------------------- /detection/tools/analysis_tools/coco_occluded_separated_recall.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser 3 | 4 | import mmengine 5 | from mmengine.logging import print_log 6 | 7 | from mmdet.datasets import CocoDataset 8 | from mmdet.evaluation import CocoOccludedSeparatedMetric 9 | 10 | 11 | def main(): 12 | parser = ArgumentParser( 13 | description='Compute recall of COCO occluded and separated masks ' 14 | 'presented in paper https://arxiv.org/abs/2210.10046.') 15 | parser.add_argument('result', help='result file (pkl format) path') 16 | parser.add_argument('--out', help='file path to save evaluation results') 17 | parser.add_argument( 18 | '--score-thr', 19 | type=float, 20 | default=0.3, 21 | help='Score threshold for the recall calculation. Defaults to 0.3') 22 | parser.add_argument( 23 | '--iou-thr', 24 | type=float, 25 | default=0.75, 26 | help='IoU threshold for the recall calculation. Defaults to 0.75.') 27 | parser.add_argument( 28 | '--ann', 29 | default='data/coco/annotations/instances_val2017.json', 30 | help='coco annotation file path') 31 | args = parser.parse_args() 32 | 33 | results = mmengine.load(args.result) 34 | assert 'masks' in results[0]['pred_instances'], \ 35 | 'The results must be predicted by instance segmentation model.' 36 | metric = CocoOccludedSeparatedMetric( 37 | ann_file=args.ann, iou_thr=args.iou_thr, score_thr=args.score_thr) 38 | metric.dataset_meta = CocoDataset.METAINFO 39 | for datasample in results: 40 | metric.process(data_batch=None, data_samples=[datasample]) 41 | metric_res = metric.compute_metrics(metric.results) 42 | if args.out is not None: 43 | mmengine.dump(metric_res, args.out) 44 | print_log(f'Evaluation results have been saved to {args.out}.') 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /detection/tools/analysis_tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmengine 5 | from mmengine import Config, DictAction 6 | from mmengine.evaluator import Evaluator 7 | from mmengine.registry import init_default_scope 8 | 9 | from mmdet.registry import DATASETS 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description='Evaluate metric of the ' 14 | 'results saved in pkl format') 15 | parser.add_argument('config', help='Config of the model') 16 | parser.add_argument('pkl_results', help='Results in pickle format') 17 | parser.add_argument( 18 | '--cfg-options', 19 | nargs='+', 20 | action=DictAction, 21 | help='override some settings in the used config, the key-value pair ' 22 | 'in xxx=yyy format will be merged into config file. If the value to ' 23 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 24 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 25 | 'Note that the quotation marks are necessary and that no white space ' 26 | 'is allowed.') 27 | args = parser.parse_args() 28 | return args 29 | 30 | 31 | def main(): 32 | args = parse_args() 33 | 34 | cfg = Config.fromfile(args.config) 35 | init_default_scope(cfg.get('default_scope', 'mmdet')) 36 | 37 | if args.cfg_options is not None: 38 | cfg.merge_from_dict(args.cfg_options) 39 | 40 | dataset = DATASETS.build(cfg.test_dataloader.dataset) 41 | predictions = mmengine.load(args.pkl_results) 42 | 43 | evaluator = Evaluator(cfg.val_evaluator) 44 | evaluator.dataset_meta = dataset.metainfo 45 | eval_results = evaluator.offline_evaluate(predictions) 46 | print(eval_results) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /detection/tools/analysis_tools/mot/dist_mot_search.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/mot_param_search.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /detection/tools/analysis_tools/mot/slurm_mot_search.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=$4 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-2} 11 | PY_ARGS=${@:5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u $(dirname "$0")/mot_param_search.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /detection/tools/dataset_converters/scripts/preprocess_coco2017.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DOWNLOAD_DIR=$1 4 | DATA_ROOT=$2 5 | 6 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/val2017.zip -d $DATA_ROOT 7 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/train2017.zip -d $DATA_ROOT 8 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/test2017.zip -d $DATA_ROOT/ 9 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Images/unlabeled2017.zip -d $DATA_ROOT 10 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/stuff_annotations_trainval2017.zip -d $DATA_ROOT/ 11 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/panoptic_annotations_trainval2017.zip -d $DATA_ROOT/ 12 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/image_info_unlabeled2017.zip -d $DATA_ROOT/ 13 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/image_info_test2017.zip -d $DATA_ROOT/ 14 | unzip $DOWNLOAD_DIR/OpenDataLab___COCO_2017/raw/Annotations/annotations_trainval2017.zip -d $DATA_ROOT 15 | rm -rf $DOWNLOAD_DIR/OpenDataLab___COCO_2017 16 | -------------------------------------------------------------------------------- /detection/tools/dataset_converters/scripts/preprocess_voc2007.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DOWNLOAD_DIR=$1 4 | DATA_ROOT=$2 5 | 6 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007/raw/VOCtrainval_06-Nov-2007.tar -C $DATA_ROOT 7 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007/raw/VOCtestnoimgs_06-Nov-2007.tar -C $DATA_ROOT 8 | rm -rf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2007 9 | -------------------------------------------------------------------------------- /detection/tools/dataset_converters/scripts/preprocess_voc2012.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DOWNLOAD_DIR=$1 4 | DATA_ROOT=$2 5 | 6 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012/raw/VOCtrainval_11-May-2012.tar -C $DATA_ROOT 7 | tar -xvf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012/raw/VOC2012test.tar -C $DATA_ROOT 8 | rm -rf $DOWNLOAD_DIR/OpenDataLab___PASCAL_VOC2012 9 | -------------------------------------------------------------------------------- /detection/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /detection/tools/dist_test_tracking.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/test_tracking.py \ 18 | $CONFIG \ 19 | --launcher pytorch \ 20 | ${@:3} 21 | -------------------------------------------------------------------------------- /detection/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --launcher pytorch ${@:3} 20 | -------------------------------------------------------------------------------- /detection/tools/misc/gen_coco_panoptic_test_info.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | 4 | from mmengine.fileio import dump, load 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Generate COCO test image information ' 10 | 'for COCO panoptic segmentation.') 11 | parser.add_argument('data_root', help='Path to COCO annotation directory.') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | data_root = args.data_root 20 | val_info = load(osp.join(data_root, 'panoptic_val2017.json')) 21 | test_old_info = load(osp.join(data_root, 'image_info_test-dev2017.json')) 22 | 23 | # replace categories from image_info_test-dev2017.json 24 | # with categories from panoptic_val2017.json which 25 | # has attribute `isthing`. 26 | test_info = test_old_info 27 | test_info.update({'categories': val_info['categories']}) 28 | dump(test_info, osp.join(data_root, 29 | 'panoptic_image_info_test-dev2017.json')) 30 | 31 | 32 | if __name__ == '__main__': 33 | main() 34 | -------------------------------------------------------------------------------- /detection/tools/model_converters/detectron2_to_mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | from mmengine.fileio import load 7 | from mmengine.runner import save_checkpoint 8 | 9 | 10 | def convert(src: str, dst: str, prefix: str = 'd2_model') -> None: 11 | """Convert Detectron2 checkpoint to MMDetection style. 12 | 13 | Args: 14 | src (str): The Detectron2 checkpoint path, should endswith `pkl`. 15 | dst (str): The MMDetection checkpoint path. 16 | prefix (str): The prefix of MMDetection model, defaults to 'd2_model'. 17 | """ 18 | # load arch_settings 19 | assert src.endswith('pkl'), \ 20 | 'the source Detectron2 checkpoint should endswith `pkl`.' 21 | d2_model = load(src, encoding='latin1').get('model') 22 | assert d2_model is not None 23 | 24 | # convert to mmdet style 25 | dst_state_dict = OrderedDict() 26 | for name, value in d2_model.items(): 27 | if not isinstance(value, torch.Tensor): 28 | value = torch.from_numpy(value) 29 | dst_state_dict[f'{prefix}.{name}'] = value 30 | 31 | mmdet_model = dict(state_dict=dst_state_dict, meta=dict()) 32 | save_checkpoint(mmdet_model, dst) 33 | print(f'Convert Detectron2 model {src} to MMDetection model {dst}') 34 | 35 | 36 | def main(): 37 | parser = argparse.ArgumentParser( 38 | description='Convert Detectron2 checkpoint to MMDetection style') 39 | parser.add_argument('src', help='Detectron2 model path') 40 | parser.add_argument('dst', help='MMDetectron model save path') 41 | parser.add_argument( 42 | '--prefix', default='d2_model', type=str, help='prefix of the model') 43 | args = parser.parse_args() 44 | convert(args.src, args.dst, args.prefix) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /detection/tools/model_converters/selfsup2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def moco_convert(src, dst): 9 | """Convert keys in pycls pretrained moco models to mmdet style.""" 10 | # load caffe model 11 | moco_model = torch.load(src) 12 | blobs = moco_model['state_dict'] 13 | # convert to pytorch style 14 | state_dict = OrderedDict() 15 | for k, v in blobs.items(): 16 | if not k.startswith('module.encoder_q.'): 17 | continue 18 | old_k = k 19 | k = k.replace('module.encoder_q.', '') 20 | state_dict[k] = v 21 | print(old_k, '->', k) 22 | # save checkpoint 23 | checkpoint = dict() 24 | checkpoint['state_dict'] = state_dict 25 | torch.save(checkpoint, dst) 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Convert model keys') 30 | parser.add_argument('src', help='src detectron model path') 31 | parser.add_argument('dst', help='save path') 32 | parser.add_argument( 33 | '--selfsup', type=str, choices=['moco', 'swav'], help='save path') 34 | args = parser.parse_args() 35 | if args.selfsup == 'moco': 36 | moco_convert(args.src, args.dst) 37 | elif args.selfsup == 'swav': 38 | print('SWAV does not need to convert the keys') 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /detection/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /detection/tools/slurm_test_tracking.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | PY_ARGS=${@:4} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test_tracking.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /detection/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cub_extra.cuh: -------------------------------------------------------------------------------- 1 | // WarpMask is copied from /usr/local/cuda-12.1/include/cub/util_ptx.cuh 2 | // PowerOfTwo is copied from /usr/local/cuda-12.1/include/cub/util_type.cuh 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /** 12 | * \brief Statically determine if N is a power-of-two 13 | */ 14 | template 15 | struct PowerOfTwo 16 | { 17 | enum { VALUE = ((N & (N - 1)) == 0) }; 18 | }; 19 | 20 | 21 | /** 22 | * @brief Returns the warp mask for a warp of @p LOGICAL_WARP_THREADS threads 23 | * 24 | * @par 25 | * If the number of threads assigned to the virtual warp is not a power of two, 26 | * it's assumed that only one virtual warp exists. 27 | * 28 | * @tparam LOGICAL_WARP_THREADS [optional] The number of threads per 29 | * "logical" warp (may be less than the number of 30 | * hardware warp threads). 31 | * @param warp_id Id of virtual warp within architectural warp 32 | */ 33 | template 34 | __host__ __device__ __forceinline__ 35 | unsigned int WarpMask(unsigned int warp_id) 36 | { 37 | constexpr bool is_pow_of_two = PowerOfTwo::VALUE; 38 | constexpr bool is_arch_warp = LOGICAL_WARP_THREADS == CUB_WARP_THREADS(0); 39 | 40 | unsigned int member_mask = 0xFFFFFFFFu >> 41 | (CUB_WARP_THREADS(0) - LOGICAL_WARP_THREADS); 42 | 43 | if (is_pow_of_two && !is_arch_warp) 44 | { 45 | member_mask <<= warp_id * LOGICAL_WARP_THREADS; 46 | } 47 | 48 | return member_mask; 49 | } 50 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cus/selective_scan_core_bwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel.cuh" 5 | 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cus/selective_scan_core_fwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel.cuh" 5 | 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusndstate/selective_scan_core_bwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_ndstate.cuh" 5 | 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusndstate/selective_scan_core_fwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_ndstate.cuh" 5 | 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_bwd_cuda<1, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<1, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd2.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_bwd_cuda<2, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<2, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<2, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd3.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_bwd_cuda<3, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<3, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<3, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_bwd4.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_bwd_cuda<4, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<4, at::Half, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<4, at::BFloat16, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_fwd_cuda<1, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<1, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd2.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_fwd_cuda<2, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<2, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<2, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd3.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_fwd_cuda<3, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<3, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<3, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusnrow/selective_scan_core_fwd4.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_nrow.cuh" 5 | 6 | template void selective_scan_fwd_cuda<4, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<4, at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<4, at::BFloat16, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | 10 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusoflex/selective_scan_core_bwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_bwd_kernel_oflex.cuh" 5 | 6 | template void selective_scan_bwd_cuda<1, float, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 7 | template void selective_scan_bwd_cuda<1, at::Half, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 8 | template void selective_scan_bwd_cuda<1, at::BFloat16, float, float>(SSMParamsBwd ¶ms, cudaStream_t stream); 9 | template void selective_scan_bwd_cuda<1, at::Half, float, at::Half>(SSMParamsBwd ¶ms, cudaStream_t stream); 10 | template void selective_scan_bwd_cuda<1, at::BFloat16, float, at::BFloat16>(SSMParamsBwd ¶ms, cudaStream_t stream); 11 | 12 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/cusoflex/selective_scan_core_fwd.cu: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2023, Tri Dao. 3 | ******************************************************************************/ 4 | #include "selective_scan_fwd_kernel_oflex.cuh" 5 | 6 | template void selective_scan_fwd_cuda<1, float, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 7 | template void selective_scan_fwd_cuda<1, at::Half, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 8 | template void selective_scan_fwd_cuda<1, at::BFloat16, float, float>(SSMParamsBase ¶ms, cudaStream_t stream); 9 | template void selective_scan_fwd_cuda<1, at::Half, float, at::Half>(SSMParamsBase ¶ms, cudaStream_t stream); 10 | template void selective_scan_fwd_cuda<1, at::BFloat16, float, at::BFloat16>(SSMParamsBase ¶ms, cudaStream_t stream); 11 | 12 | -------------------------------------------------------------------------------- /kernels/selective_scan/csrc/selective_scan/static_switch.h: -------------------------------------------------------------------------------- 1 | // Inspired by https://github.com/NVIDIA/DALI/blob/main/include/dali/core/static_switch.h 2 | // and https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Dispatch.h 3 | 4 | #pragma once 5 | 6 | /// @param COND - a boolean expression to switch by 7 | /// @param CONST_NAME - a name given for the constexpr bool variable. 8 | /// @param ... - code to execute for true and false 9 | /// 10 | /// Usage: 11 | /// ``` 12 | /// BOOL_SWITCH(flag, BoolConst, [&] { 13 | /// some_function(...); 14 | /// }); 15 | /// ``` 16 | #define BOOL_SWITCH(COND, CONST_NAME, ...) \ 17 | [&] { \ 18 | if (COND) { \ 19 | constexpr bool CONST_NAME = true; \ 20 | return __VA_ARGS__(); \ 21 | } else { \ 22 | constexpr bool CONST_NAME = false; \ 23 | return __VA_ARGS__(); \ 24 | } \ 25 | }() 26 | -------------------------------------------------------------------------------- /pretrained_weights/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HashmatShadab/MambaRobustness/e69a676bbbd072b6bc9b2c806f257a675682f6cd/pretrained_weights/.gitkeep -------------------------------------------------------------------------------- /req.txt: -------------------------------------------------------------------------------- 1 | packaging 2 | timm==0.4.12 3 | pytest 4 | chardet 5 | yacs 6 | termcolor 7 | submitit 8 | tensorboardX 9 | fvcore 10 | seaborn 11 | scipy 12 | einops 13 | torch_dct==0.1.6 14 | numba 15 | scikit-image -------------------------------------------------------------------------------- /segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | # configs/ and tools/ is copied from https://github.com/open-mmlab/mmsegmentation: version 1.2.2 2 | # tools/train.py#13 is added with "import model" 3 | # tools/test.py#8 is added with "import model" 4 | 5 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (1024, 1024) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2048, 1024), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_768x768.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (768, 768) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2049, 1025), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2049, 1025), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (769, 769) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2049, 1025), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2049, 1025), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_832x832.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | crop_size = (832, 832) 3 | train_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadAnnotations'), 6 | dict( 7 | type='RandomResize', 8 | scale=(2048, 1024), 9 | ratio_range=(0.5, 2.0), 10 | keep_ratio=True), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='PackSegInputs') 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='Resize', scale=(2048, 1024), keep_ratio=True), 19 | # add loading annotation after ``Resize`` because ground truth 20 | # does not need to do resize data transform 21 | dict(type='LoadAnnotations'), 22 | dict(type='PackSegInputs') 23 | ] 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 26 | test_dataloader = val_dataloader 27 | 28 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 29 | test_evaluator = val_evaluator 30 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/levir_256x256.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'LEVIRCDDataset' 3 | data_root = r'data/LEVIRCD' 4 | 5 | albu_train_transforms = [ 6 | dict(type='RandomBrightnessContrast', p=0.2), 7 | dict(type='HorizontalFlip', p=0.5), 8 | dict(type='VerticalFlip', p=0.5) 9 | ] 10 | 11 | train_pipeline = [ 12 | dict(type='LoadMultipleRSImageFromFile'), 13 | dict(type='LoadAnnotations'), 14 | dict(type='Albu', transforms=albu_train_transforms), 15 | dict(type='ConcatCDInput'), 16 | dict(type='PackSegInputs') 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadMultipleRSImageFromFile'), 20 | dict(type='LoadAnnotations'), 21 | dict(type='ConcatCDInput'), 22 | dict(type='PackSegInputs') 23 | ] 24 | 25 | tta_pipeline = [ 26 | dict(type='LoadMultipleRSImageFromFile'), 27 | dict( 28 | type='TestTimeAug', 29 | transforms=[[dict(type='LoadAnnotations')], 30 | [dict(type='ConcatCDInput')], 31 | [dict(type='PackSegInputs')]]) 32 | ] 33 | train_dataloader = dict( 34 | batch_size=4, 35 | num_workers=4, 36 | persistent_workers=True, 37 | sampler=dict(type='InfiniteSampler', shuffle=True), 38 | dataset=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | data_prefix=dict( 42 | img_path='train/A', 43 | img_path2='train/B', 44 | seg_map_path='train/label'), 45 | pipeline=train_pipeline)) 46 | val_dataloader = dict( 47 | batch_size=1, 48 | num_workers=4, 49 | persistent_workers=True, 50 | sampler=dict(type='DefaultSampler', shuffle=False), 51 | dataset=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | data_prefix=dict( 55 | img_path='test/A', img_path2='test/B', seg_map_path='test/label'), 56 | pipeline=test_pipeline)) 57 | test_dataloader = val_dataloader 58 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) 59 | test_evaluator = val_evaluator 60 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/synapse.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SynapseDataset' 2 | data_root = 'data/synapse/' 3 | img_scale = (224, 224) 4 | train_pipeline = [ 5 | dict(type='LoadImageFromFile'), 6 | dict(type='LoadAnnotations'), 7 | dict(type='Resize', scale=img_scale, keep_ratio=True), 8 | dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20), 9 | dict(type='PackSegInputs') 10 | ] 11 | test_pipeline = [ 12 | dict(type='LoadImageFromFile'), 13 | dict(type='Resize', scale=img_scale, keep_ratio=True), 14 | dict(type='LoadAnnotations'), 15 | dict(type='PackSegInputs') 16 | ] 17 | train_dataloader = dict( 18 | batch_size=6, 19 | num_workers=2, 20 | persistent_workers=True, 21 | sampler=dict(type='InfiniteSampler', shuffle=True), 22 | dataset=dict( 23 | type=dataset_type, 24 | data_root=data_root, 25 | data_prefix=dict( 26 | img_path='img_dir/train', seg_map_path='ann_dir/train'), 27 | pipeline=train_pipeline)) 28 | val_dataloader = dict( 29 | batch_size=1, 30 | num_workers=4, 31 | persistent_workers=True, 32 | sampler=dict(type='DefaultSampler', shuffle=False), 33 | dataset=dict( 34 | type=dataset_type, 35 | data_root=data_root, 36 | data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), 37 | pipeline=test_pipeline)) 38 | test_dataloader = val_dataloader 39 | 40 | val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) 41 | test_evaluator = val_evaluator 42 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmseg' 2 | env_cfg = dict( 3 | cudnn_benchmark=True, 4 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 5 | dist_cfg=dict(backend='nccl'), 6 | ) 7 | vis_backends = [dict(type='LocalVisBackend')] 8 | visualizer = dict( 9 | type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer') 10 | log_processor = dict(by_epoch=False) 11 | log_level = 'INFO' 12 | load_from = None 13 | resume = False 14 | 15 | tta_model = dict(type='SegTTAModel') 16 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/ann_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ANNHead', 27 | in_channels=[1024, 2048], 28 | in_index=[2, 3], 29 | channels=512, 30 | project_channels=256, 31 | query_scales=(1, ), 32 | key_pool_scales=(1, 3, 6, 8), 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/apcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='APCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pool_scales=(1, 2, 3, 6), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=dict(type='SyncBN', requires_grad=True), 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/ccnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='CCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | recurrence=2, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/cgnet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[72.39239876, 82.90891754, 73.15835921], 6 | std=[1, 1, 1], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | backbone=dict( 14 | type='CGNet', 15 | norm_cfg=norm_cfg, 16 | in_channels=3, 17 | num_channels=(32, 64, 128), 18 | num_blocks=(3, 21), 19 | dilations=(2, 4), 20 | reductions=(8, 16)), 21 | decode_head=dict( 22 | type='FCNHead', 23 | in_channels=256, 24 | in_index=2, 25 | channels=256, 26 | num_convs=0, 27 | concat_input=False, 28 | dropout_ratio=0, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | loss_decode=dict( 32 | type='CrossEntropyLoss', 33 | use_sigmoid=False, 34 | loss_weight=1.0, 35 | class_weight=[ 36 | 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, 37 | 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, 38 | 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, 39 | 10.396974, 10.055647 40 | ])), 41 | # model training and testing settings 42 | train_cfg=dict(sampler=None), 43 | test_cfg=dict(mode='whole')) 44 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pam_channels=64, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/deeplabv3_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ASPPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dilations=(1, 12, 24, 36), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='ASPPHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=16, 36 | dilations=(1, 12, 24, 36), 37 | dropout_ratio=0.1, 38 | num_classes=2, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=128, 46 | in_index=3, 47 | channels=64, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=2, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 59 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DepthwiseSeparableASPPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dilations=(1, 12, 24, 36), 31 | c1_in_channels=256, 32 | c1_channels=48, 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/dmnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DMHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | filter_sizes=(1, 3, 5, 7), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=dict(type='SyncBN', requires_grad=True), 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/dnl_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='DNLHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dropout_ratio=0.1, 31 | reduction=2, 32 | use_scale=True, 33 | mode='embedded_gaussian', 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/dpt_vit-b16.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa 13 | backbone=dict( 14 | type='VisionTransformer', 15 | img_size=224, 16 | embed_dims=768, 17 | num_layers=12, 18 | num_heads=12, 19 | out_indices=(2, 5, 8, 11), 20 | final_norm=False, 21 | with_cls_token=True, 22 | output_cls_token=True), 23 | decode_head=dict( 24 | type='DPTHead', 25 | in_channels=(768, 768, 768, 768), 26 | channels=256, 27 | embed_dims=768, 28 | post_process_channels=[96, 192, 384, 768], 29 | num_classes=150, 30 | readout_type='project', 31 | input_transform='multiple_select', 32 | in_index=(0, 1, 2, 3), 33 | norm_cfg=norm_cfg, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | auxiliary_head=None, 37 | # model training and testing settings 38 | train_cfg=dict(), 39 | test_cfg=dict(mode='whole')) # yapf: disable 40 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/emanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='EMAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=256, 30 | ema_channels=512, 31 | num_bases=64, 32 | num_stages=3, 33 | momentum=0.1, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | auxiliary_head=dict( 41 | type='FCNHead', 42 | in_channels=1024, 43 | in_index=2, 44 | channels=256, 45 | num_convs=1, 46 | concat_input=False, 47 | dropout_ratio=0.1, 48 | num_classes=19, 49 | norm_cfg=norm_cfg, 50 | align_corners=False, 51 | loss_decode=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='whole')) 56 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/encnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='EncHead', 27 | in_channels=[512, 1024, 2048], 28 | in_index=(1, 2, 3), 29 | channels=512, 30 | num_codes=32, 31 | use_se_loss=True, 32 | add_lateral=False, 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_se_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), 41 | auxiliary_head=dict( 42 | type='FCNHead', 43 | in_channels=1024, 44 | in_index=2, 45 | channels=256, 46 | num_convs=1, 47 | concat_input=False, 48 | dropout_ratio=0.1, 49 | num_classes=19, 50 | norm_cfg=norm_cfg, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 54 | # model training and testing settings 55 | train_cfg=dict(), 56 | test_cfg=dict(mode='whole')) 57 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/erfnet_fcn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='ERFNet', 16 | in_channels=3, 17 | enc_downsample_channels=(16, 64, 128), 18 | enc_stage_non_bottlenecks=(5, 8), 19 | enc_non_bottleneck_dilations=(2, 4, 8, 16), 20 | enc_non_bottleneck_channels=(64, 128), 21 | dec_upsample_channels=(64, 16), 22 | dec_stages_non_bottleneck=(2, 2), 23 | dec_non_bottleneck_channels=(64, 16), 24 | dropout_ratio=0.1, 25 | init_cfg=None), 26 | decode_head=dict( 27 | type='FCNHead', 28 | in_channels=16, 29 | channels=128, 30 | num_convs=1, 31 | concat_input=False, 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | # model training and testing settings 39 | train_cfg=dict(), 40 | test_cfg=dict(mode='whole')) 41 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | dilations=(1, 1, 2, 4), 19 | strides=(1, 2, 2, 2), 20 | out_indices=(1, 2, 3), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | neck=dict( 26 | type='JPU', 27 | in_channels=(512, 1024, 2048), 28 | mid_channels=512, 29 | start_level=0, 30 | end_level=-1, 31 | dilations=(1, 2, 4, 8), 32 | align_corners=False, 33 | norm_cfg=norm_cfg), 34 | decode_head=dict( 35 | type='PSPHead', 36 | in_channels=2048, 37 | in_index=2, 38 | channels=512, 39 | pool_scales=(1, 2, 3, 6), 40 | dropout_ratio=0.1, 41 | num_classes=19, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 46 | auxiliary_head=dict( 47 | type='FCNHead', 48 | in_channels=1024, 49 | in_index=1, 50 | channels=256, 51 | num_convs=1, 52 | concat_input=False, 53 | dropout_ratio=0.1, 54 | num_classes=19, 55 | norm_cfg=norm_cfg, 56 | align_corners=False, 57 | loss_decode=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 59 | # model training and testing settings 60 | train_cfg=dict(), 61 | test_cfg=dict(mode='whole')) 62 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fcn_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='FCNHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | num_convs=2, 31 | concat_input=True, 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fcn_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='FCNHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=64, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=2, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 44 | auxiliary_head=dict( 45 | type='FCNHead', 46 | in_channels=128, 47 | in_index=3, 48 | channels=64, 49 | num_convs=1, 50 | concat_input=False, 51 | dropout_ratio=0.1, 52 | num_classes=2, 53 | norm_cfg=norm_cfg, 54 | align_corners=False, 55 | loss_decode=dict( 56 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 57 | # model training and testing settings 58 | train_cfg=dict(), 59 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 60 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fpn_poolformer_s12.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa 4 | # TODO: delete custom_imports after mmpretrain supports auto import 5 | # please install mmpretrain >= 1.0.0rc7 6 | # import mmpretrain.models to trigger register_module in mmpretrain 7 | custom_imports = dict( 8 | imports=['mmpretrain.models'], allow_failed_imports=False) 9 | data_preprocessor = dict( 10 | type='SegDataPreProcessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_val=0, 15 | seg_pad_val=255) 16 | model = dict( 17 | type='EncoderDecoder', 18 | data_preprocessor=data_preprocessor, 19 | backbone=dict( 20 | type='mmpretrain.PoolFormer', 21 | arch='s12', 22 | init_cfg=dict( 23 | type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'), 24 | in_patch_size=7, 25 | in_stride=4, 26 | in_pad=2, 27 | down_patch_size=3, 28 | down_stride=2, 29 | down_pad=1, 30 | drop_rate=0., 31 | drop_path_rate=0., 32 | out_indices=(0, 2, 4, 6), 33 | frozen_stages=0, 34 | ), 35 | neck=dict( 36 | type='FPN', 37 | in_channels=[256, 512, 1024, 2048], 38 | out_channels=256, 39 | num_outs=4), 40 | decode_head=dict( 41 | type='FPNHead', 42 | in_channels=[256, 256, 256, 256], 43 | in_index=[0, 1, 2, 3], 44 | feature_strides=[4, 8, 16, 32], 45 | channels=128, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 1, 1), 20 | strides=(1, 2, 2, 2), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | num_outs=4), 30 | decode_head=dict( 31 | type='FPNHead', 32 | in_channels=[256, 256, 256, 256], 33 | in_index=[0, 1, 2, 3], 34 | feature_strides=[4, 8, 16, 32], 35 | channels=128, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/gcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='GCHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | ratio=1 / 4., 31 | pooling_type='att', 32 | fusion_types=('channel_add', ), 33 | dropout_ratio=0.1, 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/isanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='ISAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | isa_channels=256, 31 | down_factor=(8, 8), 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/lraspp_m-v3-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | backbone=dict( 14 | type='MobileNetV3', 15 | arch='large', 16 | out_indices=(1, 3, 16), 17 | norm_cfg=norm_cfg), 18 | decode_head=dict( 19 | type='LRASPPHead', 20 | in_channels=(16, 24, 960), 21 | in_index=(0, 1, 2), 22 | channels=128, 23 | input_transform='multiple_select', 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | act_cfg=dict(type='ReLU'), 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | # model training and testing settings 32 | train_cfg=dict(), 33 | test_cfg=dict(mode='whole')) 34 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/nonlocal_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='NLHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | dropout_ratio=0.1, 31 | reduction=2, 32 | use_scale=True, 33 | mode='embedded_gaussian', 34 | num_classes=19, 35 | norm_cfg=norm_cfg, 36 | align_corners=False, 37 | loss_decode=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 39 | auxiliary_head=dict( 40 | type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | # model training and testing settings 53 | train_cfg=dict(), 54 | test_cfg=dict(mode='whole')) 55 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/ocrnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='CascadeEncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | num_stages=2, 14 | pretrained='open-mmlab://resnet50_v1c', 15 | backbone=dict( 16 | type='ResNetV1c', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | dilations=(1, 1, 2, 4), 21 | strides=(1, 2, 1, 1), 22 | norm_cfg=norm_cfg, 23 | norm_eval=False, 24 | style='pytorch', 25 | contract_dilation=True), 26 | decode_head=[ 27 | dict( 28 | type='FCNHead', 29 | in_channels=1024, 30 | in_index=2, 31 | channels=256, 32 | num_convs=1, 33 | concat_input=False, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 40 | dict( 41 | type='OCRHead', 42 | in_channels=2048, 43 | in_index=3, 44 | channels=512, 45 | ocr_channels=256, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 52 | ], 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='whole')) 56 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/psanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='PSAHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | mask_size=(97, 97), 31 | psa_type='bi-direction', 32 | compact=False, 33 | shrink_factor=2, 34 | normalization_factor=1.0, 35 | psa_softmax=True, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=1024, 45 | in_index=2, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/pspnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='PSPHead', 27 | in_channels=2048, 28 | in_index=3, 29 | channels=512, 30 | pool_scales=(1, 2, 3, 6), 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/pspnet_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='UNet', 16 | in_channels=3, 17 | base_channels=64, 18 | num_stages=5, 19 | strides=(1, 1, 1, 1, 1), 20 | enc_num_convs=(2, 2, 2, 2, 2), 21 | dec_num_convs=(2, 2, 2, 2), 22 | downsamples=(True, True, True, True), 23 | enc_dilations=(1, 1, 1, 1, 1), 24 | dec_dilations=(1, 1, 1, 1), 25 | with_cp=False, 26 | conv_cfg=None, 27 | norm_cfg=norm_cfg, 28 | act_cfg=dict(type='ReLU'), 29 | upsample_cfg=dict(type='InterpConv'), 30 | norm_eval=False), 31 | decode_head=dict( 32 | type='PSPHead', 33 | in_channels=64, 34 | in_index=4, 35 | channels=16, 36 | pool_scales=(1, 2, 3, 6), 37 | dropout_ratio=0.1, 38 | num_classes=2, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=128, 46 | in_index=3, 47 | channels=64, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=2, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 59 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/segformer_mit-b0.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained=None, 14 | backbone=dict( 15 | type='MixVisionTransformer', 16 | in_channels=3, 17 | embed_dims=32, 18 | num_stages=4, 19 | num_layers=[2, 2, 2, 2], 20 | num_heads=[1, 2, 5, 8], 21 | patch_sizes=[7, 3, 3, 3], 22 | sr_ratios=[8, 4, 2, 1], 23 | out_indices=(0, 1, 2, 3), 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | drop_rate=0.0, 27 | attn_drop_rate=0.0, 28 | drop_path_rate=0.1), 29 | decode_head=dict( 30 | type='SegformerHead', 31 | in_channels=[32, 64, 160, 256], 32 | in_index=[0, 1, 2, 3], 33 | channels=256, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | # model training and testing settings 41 | train_cfg=dict(), 42 | test_cfg=dict(mode='whole')) 43 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/segmenter_vit-b16_mask.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa 2 | # model settings 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[127.5, 127.5, 127.5], 7 | std=[127.5, 127.5, 127.5], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=checkpoint, 15 | backbone=dict( 16 | type='VisionTransformer', 17 | img_size=(512, 512), 18 | patch_size=16, 19 | in_channels=3, 20 | embed_dims=768, 21 | num_layers=12, 22 | num_heads=12, 23 | drop_path_rate=0.1, 24 | attn_drop_rate=0.0, 25 | drop_rate=0.0, 26 | final_norm=True, 27 | norm_cfg=backbone_norm_cfg, 28 | with_cls_token=True, 29 | interpolate_mode='bicubic', 30 | ), 31 | decode_head=dict( 32 | type='SegmenterMaskTransformerHead', 33 | in_channels=768, 34 | channels=768, 35 | num_classes=150, 36 | num_layers=2, 37 | num_heads=12, 38 | embed_dims=768, 39 | dropout_ratio=0.0, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 42 | ), 43 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), 44 | ) 45 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/twins_pcpvt-s_fpn.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | data_preprocessor = dict( 7 | type='SegDataPreProcessor', 8 | mean=[123.675, 116.28, 103.53], 9 | std=[58.395, 57.12, 57.375], 10 | bgr_to_rgb=True, 11 | pad_val=0, 12 | seg_pad_val=255) 13 | model = dict( 14 | type='EncoderDecoder', 15 | data_preprocessor=data_preprocessor, 16 | backbone=dict( 17 | type='PCPVT', 18 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 19 | in_channels=3, 20 | embed_dims=[64, 128, 320, 512], 21 | num_heads=[1, 2, 5, 8], 22 | patch_sizes=[4, 2, 2, 2], 23 | strides=[4, 2, 2, 2], 24 | mlp_ratios=[8, 8, 4, 4], 25 | out_indices=(0, 1, 2, 3), 26 | qkv_bias=True, 27 | norm_cfg=backbone_norm_cfg, 28 | depths=[3, 4, 6, 3], 29 | sr_ratios=[8, 4, 2, 1], 30 | norm_after_stage=False, 31 | drop_rate=0.0, 32 | attn_drop_rate=0., 33 | drop_path_rate=0.2), 34 | neck=dict( 35 | type='FPN', 36 | in_channels=[64, 128, 320, 512], 37 | out_channels=256, 38 | num_outs=4), 39 | decode_head=dict( 40 | type='FPNHead', 41 | in_channels=[256, 256, 256, 256], 42 | in_index=[0, 1, 2, 3], 43 | feature_strides=[4, 8, 16, 32], 44 | channels=128, 45 | dropout_ratio=0.1, 46 | num_classes=150, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_beit.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained=None, 13 | backbone=dict( 14 | type='BEiT', 15 | img_size=(640, 640), 16 | patch_size=16, 17 | in_channels=3, 18 | embed_dims=768, 19 | num_layers=12, 20 | num_heads=12, 21 | mlp_ratio=4, 22 | out_indices=(3, 5, 7, 11), 23 | qv_bias=True, 24 | attn_drop_rate=0.0, 25 | drop_path_rate=0.1, 26 | norm_cfg=dict(type='LN', eps=1e-6), 27 | act_cfg=dict(type='GELU'), 28 | norm_eval=False, 29 | init_values=0.1), 30 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 31 | decode_head=dict( 32 | type='UPerHead', 33 | in_channels=[768, 768, 768, 768], 34 | in_index=[0, 1, 2, 3], 35 | pool_scales=(1, 2, 3, 6), 36 | channels=768, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | auxiliary_head=dict( 44 | type='FCNHead', 45 | in_channels=768, 46 | in_index=2, 47 | channels=256, 48 | num_convs=1, 49 | concat_input=False, 50 | dropout_ratio=0.1, 51 | num_classes=150, 52 | norm_cfg=norm_cfg, 53 | align_corners=False, 54 | loss_decode=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='whole')) 59 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_convnext.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=None, 15 | backbone=dict( 16 | type='mmpretrain.ConvNeXt', 17 | arch='base', 18 | out_indices=[0, 1, 2, 3], 19 | drop_path_rate=0.4, 20 | layer_scale_init_value=1.0, 21 | gap_before_final_norm=False, 22 | init_cfg=dict( 23 | type='Pretrained', checkpoint=checkpoint_file, 24 | prefix='backbone.')), 25 | decode_head=dict( 26 | type='UPerHead', 27 | in_channels=[128, 256, 512, 1024], 28 | in_index=[0, 1, 2, 3], 29 | pool_scales=(1, 2, 3, 6), 30 | channels=512, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=384, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_mae.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | data_preprocessor = dict( 3 | type='SegDataPreProcessor', 4 | mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | bgr_to_rgb=True, 7 | pad_val=0, 8 | seg_pad_val=255) 9 | model = dict( 10 | type='EncoderDecoder', 11 | data_preprocessor=data_preprocessor, 12 | pretrained=None, 13 | backbone=dict( 14 | type='MAE', 15 | img_size=(640, 640), 16 | patch_size=16, 17 | in_channels=3, 18 | embed_dims=768, 19 | num_layers=12, 20 | num_heads=12, 21 | mlp_ratio=4, 22 | out_indices=(3, 5, 7, 11), 23 | attn_drop_rate=0.0, 24 | drop_path_rate=0.1, 25 | norm_cfg=dict(type='LN', eps=1e-6), 26 | act_cfg=dict(type='GELU'), 27 | norm_eval=False, 28 | init_values=0.1), 29 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 30 | decode_head=dict( 31 | type='UPerHead', 32 | in_channels=[384, 384, 384, 384], 33 | in_index=[0, 1, 2, 3], 34 | pool_scales=(1, 2, 3, 6), 35 | channels=512, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=384, 45 | in_index=2, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | data_preprocessor = dict( 4 | type='SegDataPreProcessor', 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255) 10 | model = dict( 11 | type='EncoderDecoder', 12 | data_preprocessor=data_preprocessor, 13 | pretrained='open-mmlab://resnet50_v1c', 14 | backbone=dict( 15 | type='ResNetV1c', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 1, 1), 20 | strides=(1, 2, 2, 2), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | style='pytorch', 24 | contract_dilation=True), 25 | decode_head=dict( 26 | type='UPerHead', 27 | in_channels=[256, 512, 1024, 2048], 28 | in_index=[0, 1, 2, 3], 29 | pool_scales=(1, 2, 3, 6), 30 | channels=512, 31 | dropout_ratio=0.1, 32 | num_classes=19, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict( 36 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 37 | auxiliary_head=dict( 38 | type='FCNHead', 39 | in_channels=1024, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=160000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 160k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=20000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 20k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_240k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=240000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 240k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_25k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='LinearLR', start_factor=3e-2, begin=0, end=12000, 8 | by_epoch=False), 9 | dict( 10 | type='PolyLRRatio', 11 | eta_min_ratio=3e-2, 12 | power=0.9, 13 | begin=12000, 14 | end=24000, 15 | by_epoch=False), 16 | dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000) 17 | ] 18 | # training schedule for 25k 19 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000) 20 | val_cfg = dict(type='ValLoop') 21 | test_cfg = dict(type='TestLoop') 22 | default_hooks = dict( 23 | timer=dict(type='IterTimerHook'), 24 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 25 | param_scheduler=dict(type='ParamSchedulerHook'), 26 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 27 | sampler_seed=dict(type='DistSamplerSeedHook'), 28 | visualization=dict(type='SegVisualizationHook')) 29 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=320000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 320k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=40000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 40k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=80000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 80k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 10 | auxiliary_head=dict(in_channels=512, num_classes=150), 11 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), 12 | ) 13 | 14 | optim_wrapper = dict( 15 | _delete_=True, 16 | type='AmpOptimWrapper', 17 | optimizer=dict( 18 | type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), 19 | paramwise_cfg={ 20 | 'decay_rate': 0.9, 21 | 'decay_type': 'stage_wise', 22 | 'num_layers': 12 23 | }, 24 | constructor='LearningRateDecayOptimizerConstructor', 25 | loss_scale='dynamic') 26 | 27 | param_scheduler = [ 28 | dict( 29 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 30 | dict( 31 | type='PolyLR', 32 | power=1.0, 33 | begin=1500, 34 | end=160000, 35 | eta_min=0.0, 36 | by_epoch=False, 37 | ) 38 | ] 39 | 40 | # By default, models are trained on 8 GPUs with 2 images per GPU 41 | train_dataloader = dict(batch_size=2) 42 | val_dataloader = dict(batch_size=1) 43 | test_dataloader = val_dataloader 44 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | pretrain_img_size=384, 9 | embed_dims=128, 10 | depths=[2, 2, 18, 2], 11 | num_heads=[4, 8, 16, 32], 12 | window_size=12), 13 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 14 | auxiliary_head=dict(in_channels=512, num_classes=150)) 15 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py' # noqa 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) 8 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | embed_dims=128, 9 | depths=[2, 2, 18, 2], 10 | num_heads=[4, 8, 16, 32]), 11 | decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), 12 | auxiliary_head=dict(in_channels=512, num_classes=150)) 13 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) 8 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-large-patch4-window7-in22k-pre_upernet_' 3 | '8xb2-160k_ade20k-512x512.py' 4 | ] 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth' # noqa 6 | model = dict( 7 | backbone=dict( 8 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 9 | pretrain_img_size=384, 10 | window_size=12)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_' 3 | 'ade20k-512x512.py' 4 | ] 5 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth' # noqa 6 | model = dict( 7 | backbone=dict( 8 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 9 | pretrain_img_size=224, 10 | embed_dims=192, 11 | depths=[2, 2, 18, 2], 12 | num_heads=[6, 12, 24, 48], 13 | window_size=7), 14 | decode_head=dict(in_channels=[192, 384, 768, 1536], num_classes=150), 15 | auxiliary_head=dict(in_channels=768, num_classes=150)) 16 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa 5 | model = dict( 6 | backbone=dict( 7 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 8 | depths=[2, 2, 18, 2]), 9 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), 10 | auxiliary_head=dict(in_channels=384, num_classes=150)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | backbone=dict( 11 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), 12 | embed_dims=96, 13 | depths=[2, 2, 6, 2], 14 | num_heads=[3, 6, 12, 24], 15 | window_size=7, 16 | use_abs_pos_embed=False, 17 | drop_path_rate=0.3, 18 | patch_norm=True), 19 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), 20 | auxiliary_head=dict(in_channels=384, num_classes=150)) 21 | 22 | # AdamW optimizer, no weight decay for position embedding & layer norm 23 | # in backbone 24 | optim_wrapper = dict( 25 | _delete_=True, 26 | type='OptimWrapper', 27 | optimizer=dict( 28 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 29 | paramwise_cfg=dict( 30 | custom_keys={ 31 | 'absolute_pos_embed': dict(decay_mult=0.), 32 | 'relative_position_bias_table': dict(decay_mult=0.), 33 | 'norm': dict(decay_mult=0.) 34 | })) 35 | 36 | param_scheduler = [ 37 | dict( 38 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 39 | dict( 40 | type='PolyLR', 41 | eta_min=0.0, 42 | power=1.0, 43 | begin=1500, 44 | end=160000, 45 | by_epoch=False, 46 | ) 47 | ] 48 | 49 | # By default, models are trained on 8 GPUs with 2 images per GPU 50 | train_dataloader = dict(batch_size=2) 51 | val_dataloader = dict(batch_size=1) 52 | test_dataloader = val_dataloader 53 | -------------------------------------------------------------------------------- /segmentation/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_swin.py', '../_base_/datasets/levir_256x256.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' 4 | ] 5 | crop_size = (256, 256) 6 | norm_cfg = dict(type='BN', requires_grad=True) 7 | data_preprocessor = dict( 8 | size=crop_size, 9 | type='SegDataPreProcessor', 10 | mean=[123.675, 116.28, 103.53, 123.675, 116.28, 103.53], 11 | std=[58.395, 57.12, 57.375, 58.395, 57.12, 57.375]) 12 | 13 | model = dict( 14 | data_preprocessor=data_preprocessor, 15 | backbone=dict( 16 | in_channels=6, 17 | embed_dims=96, 18 | depths=[2, 2, 6, 2], 19 | num_heads=[3, 6, 12, 24], 20 | window_size=7, 21 | use_abs_pos_embed=False, 22 | drop_path_rate=0.3, 23 | patch_norm=True), 24 | decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=2), 25 | auxiliary_head=dict(in_channels=384, num_classes=2)) 26 | 27 | # AdamW optimizer, no weight decay for position embedding & layer norm 28 | # in backbone 29 | optim_wrapper = dict( 30 | _delete_=True, 31 | type='OptimWrapper', 32 | optimizer=dict( 33 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 34 | paramwise_cfg=dict( 35 | custom_keys={ 36 | 'absolute_pos_embed': dict(decay_mult=0.), 37 | 'relative_position_bias_table': dict(decay_mult=0.), 38 | 'norm': dict(decay_mult=0.) 39 | })) 40 | 41 | param_scheduler = [ 42 | dict( 43 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 44 | dict( 45 | type='PolyLR', 46 | eta_min=0.0, 47 | power=1.0, 48 | begin=1500, 49 | end=20000, 50 | by_epoch=False, 51 | ) 52 | ] 53 | 54 | train_dataloader = dict(batch_size=4) 55 | val_dataloader = dict(batch_size=1) 56 | test_dataloader = val_dataloader 57 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-769x769.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-769x769.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-160k_ade20k-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-20k_voc12aug-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-40k_voc12aug-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb4-80k_ade20k-512x512.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet18_v1c', 4 | backbone=dict(depth=18), 5 | decode_head=dict(in_channels=[64, 128, 256, 512]), 6 | auxiliary_head=dict(in_channels=256)) 7 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet18_v1c', 4 | backbone=dict(depth=18), 5 | decode_head=dict(in_channels=[64, 128, 256, 512]), 6 | auxiliary_head=dict(in_channels=256)) 7 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | model = dict( 6 | pretrained='open-mmlab://resnet18_v1c', 7 | backbone=dict(depth=18), 8 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), 9 | auxiliary_head=dict(in_channels=256, num_classes=150)) 10 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_20k.py' 5 | ] 6 | model = dict( 7 | pretrained='open-mmlab://resnet18_v1c', 8 | backbone=dict(depth=18), 9 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), 10 | auxiliary_head=dict(in_channels=256, num_classes=21)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | model = dict( 7 | pretrained='open-mmlab://resnet18_v1c', 8 | backbone=dict(depth=18), 9 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), 10 | auxiliary_head=dict(in_channels=256, num_classes=21)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | model = dict( 6 | pretrained='open-mmlab://resnet18_v1c', 7 | backbone=dict(depth=18), 8 | decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), 9 | auxiliary_head=dict(in_channels=256, num_classes=150)) 10 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' 4 | ] 5 | crop_size = (512, 1024) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict(data_preprocessor=data_preprocessor) 8 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | crop_size = (769, 769) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(align_corners=True), 11 | auxiliary_head=dict(align_corners=True), 12 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 13 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | crop_size = (512, 1024) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict(data_preprocessor=data_preprocessor) 8 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_80k.py' 5 | ] 6 | crop_size = (769, 769) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(align_corners=True), 11 | auxiliary_head=dict(align_corners=True), 12 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 13 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(num_classes=150), 10 | auxiliary_head=dict(num_classes=150)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_20k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(num_classes=21), 11 | auxiliary_head=dict(num_classes=21)) 12 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | decode_head=dict(num_classes=21), 11 | auxiliary_head=dict(num_classes=21)) 12 | -------------------------------------------------------------------------------- /segmentation/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | crop_size = (512, 512) 6 | data_preprocessor = dict(size=crop_size) 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | decode_head=dict(num_classes=150), 10 | auxiliary_head=dict(num_classes=150)) 11 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1, final_norm=True)) 6 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | ) 7 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | neck=None) 7 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', 5 | backbone=dict(drop_path_rate=0.1), 6 | neck=None) 7 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict( 6 | num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), 7 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 8 | neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), 9 | auxiliary_head=dict(num_classes=150, in_channels=384)) 10 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=None, 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' 2 | 3 | model = dict( 4 | pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', 5 | backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), 6 | decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), 7 | neck=None, 8 | auxiliary_head=dict(num_classes=150, in_channels=384)) 9 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | backbone=dict(drop_path_rate=0.1, final_norm=True), 12 | decode_head=dict(num_classes=150), 13 | auxiliary_head=dict(num_classes=150)) 14 | 15 | # AdamW optimizer, no weight decay for position embedding & layer norm 16 | # in backbone 17 | optim_wrapper = dict( 18 | _delete_=True, 19 | type='OptimWrapper', 20 | optimizer=dict( 21 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 22 | paramwise_cfg=dict( 23 | custom_keys={ 24 | 'pos_embed': dict(decay_mult=0.), 25 | 'cls_token': dict(decay_mult=0.), 26 | 'norm': dict(decay_mult=0.) 27 | })) 28 | 29 | param_scheduler = [ 30 | dict( 31 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 32 | dict( 33 | type='PolyLR', 34 | eta_min=0.0, 35 | power=1.0, 36 | begin=1500, 37 | end=160000, 38 | by_epoch=False, 39 | ) 40 | ] 41 | 42 | # By default, models are trained on 8 GPUs with 2 images per GPU 43 | train_dataloader = dict(batch_size=2) 44 | val_dataloader = dict(batch_size=1) 45 | test_dataloader = val_dataloader 46 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_160k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | decode_head=dict(num_classes=150), 12 | auxiliary_head=dict(num_classes=150)) 13 | 14 | # AdamW optimizer, no weight decay for position embedding & layer norm 15 | # in backbone 16 | optim_wrapper = dict( 17 | _delete_=True, 18 | type='OptimWrapper', 19 | optimizer=dict( 20 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 21 | paramwise_cfg=dict( 22 | custom_keys={ 23 | 'pos_embed': dict(decay_mult=0.), 24 | 'cls_token': dict(decay_mult=0.), 25 | 'norm': dict(decay_mult=0.) 26 | })) 27 | 28 | param_scheduler = [ 29 | dict( 30 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 31 | dict( 32 | type='PolyLR', 33 | eta_min=0.0, 34 | power=1.0, 35 | begin=1500, 36 | end=160000, 37 | by_epoch=False, 38 | ) 39 | ] 40 | 41 | # By default, models are trained on 8 GPUs with 2 images per GPU 42 | train_dataloader = dict(batch_size=2) 43 | val_dataloader = dict(batch_size=1) 44 | test_dataloader = val_dataloader 45 | -------------------------------------------------------------------------------- /segmentation/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_vit-b16_ln_mln.py', 3 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_80k.py' 5 | ] 6 | crop_size = (512, 512) 7 | data_preprocessor = dict(size=crop_size) 8 | model = dict( 9 | data_preprocessor=data_preprocessor, 10 | pretrained='pretrain/vit_base_patch16_224.pth', 11 | decode_head=dict(num_classes=150), 12 | auxiliary_head=dict(num_classes=150)) 13 | 14 | # AdamW optimizer, no weight decay for position embedding & layer norm 15 | # in backbone 16 | optim_wrapper = dict( 17 | _delete_=True, 18 | type='OptimWrapper', 19 | optimizer=dict( 20 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 21 | paramwise_cfg=dict( 22 | custom_keys={ 23 | 'pos_embed': dict(decay_mult=0.), 24 | 'cls_token': dict(decay_mult=0.), 25 | 'norm': dict(decay_mult=0.) 26 | })) 27 | 28 | param_scheduler = [ 29 | dict( 30 | type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), 31 | dict( 32 | type='PolyLR', 33 | eta_min=0.0, 34 | power=1.0, 35 | begin=1500, 36 | end=80000, 37 | by_epoch=False, 38 | ) 39 | ] 40 | 41 | # By default, models are trained on 8 GPUs with 2 images per GPU 42 | train_dataloader = dict(batch_size=2) 43 | val_dataloader = dict(batch_size=1) 44 | test_dataloader = val_dataloader 45 | -------------------------------------------------------------------------------- /segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MMSEG_VSSM', 7 | depths=(2, 2, 27, 2), 8 | dims=128, 9 | out_indices=(0, 1, 2, 3), 10 | pretrained="../../ckpts/vssmbase/ckpt_epoch_260.pth", 11 | ),) 12 | # train_dataloader = dict(batch_size=4) # as gpus=4 13 | 14 | -------------------------------------------------------------------------------- /segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MMSEG_VSSM', 7 | depths=(2, 2, 27, 2), 8 | dims=96, 9 | out_indices=(0, 1, 2, 3), 10 | pretrained="../../ckpts/vssmsmall/ema_ckpt_epoch_238.pth", 11 | ),) 12 | # train_dataloader = dict(batch_size=4) # as gpus=4 13 | 14 | -------------------------------------------------------------------------------- /segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-512x512_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MMSEG_VSSM', 7 | depths=(2, 2, 9, 2), 8 | dims=96, 9 | out_indices=(0, 1, 2, 3), 10 | pretrained="../../ckpts/vssmtiny/ckpt_epoch_292.pth", 11 | ),) 12 | # train_dataloader = dict(batch_size=4) # as gpus=4 13 | 14 | -------------------------------------------------------------------------------- /segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-640x640_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './upernet_swin_4xb4-160k_ade20k-640x640_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MMSEG_VSSM', 7 | depths=(2, 2, 27, 2), 8 | dims=96, 9 | out_indices=(0, 1, 2, 3), 10 | pretrained="../../ckpts/vssmsmall/ckpt_epoch_238.pth", 11 | ),) 12 | # train_dataloader = dict(batch_size=4) # as gpus=4 13 | 14 | -------------------------------------------------------------------------------- /segmentation/configs/vmamba/upernet_vmamba_4xb4-160k_ade20k-896x896_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './upernet_swin_4xb4-160k_ade20k-896x896_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MMSEG_VSSM', 7 | depths=(2, 2, 27, 2), 8 | dims=96, 9 | out_indices=(0, 1, 2, 3), 10 | pretrained="../../ckpts/vssmsmall/ckpt_epoch_238.pth", 11 | ),) 12 | train_dataloader = dict(batch_size=4) # as gpus=4 13 | 14 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_convnext_4xb4-160k_ade20k-640x640_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py' 3 | ] 4 | crop_size = (640, 640) 5 | data_preprocessor = dict(size=crop_size) 6 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa 7 | model = dict( 8 | data_preprocessor=data_preprocessor, 9 | backbone=dict( 10 | type='mmpretrain.ConvNeXt', 11 | arch='small', 12 | out_indices=[0, 1, 2, 3], 13 | drop_path_rate=0.3, 14 | layer_scale_init_value=1.0, 15 | gap_before_final_norm=False, 16 | init_cfg=dict( 17 | type='Pretrained', checkpoint=checkpoint_file, 18 | prefix='backbone.')), 19 | decode_head=dict( 20 | in_channels=[96, 192, 384, 768], 21 | num_classes=150, 22 | ), 23 | auxiliary_head=dict(in_channels=384, num_classes=150), 24 | test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), 25 | ) 26 | 27 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="../../ckpts/classification/outs/vssm/vssmbasedp05/vssmbase_dp05_ckpt_epoch_260.pth", 9 | # copied from classification/configs/vssm/vssm_base_224.yaml 10 | dims=128, 11 | depths=(2, 2, 27, 2), 12 | ssm_d_state=16, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | mlp_ratio=0.0, 16 | downsample_version="v1", 17 | patchembed_version="v1", 18 | # forward_type="v0", # if you want exactly the same 19 | ),) 20 | # train_dataloader = dict(batch_size=4) # as gpus=4 21 | 22 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 27, 2), 12 | ssm_d_state=16, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | mlp_ratio=0.0, 16 | downsample_version="v1", 17 | patchembed_version="v1", 18 | # forward_type="v0", # if you want exactly the same 19 | ),) 20 | # train_dataloader = dict(batch_size=4) # as gpus=4 21 | 22 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="../../ckpts/classification/outs/vssm/vssmtiny/vssmtiny_dp01_ckpt_epoch_292.pth", 9 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 10 | dims=96, 11 | depths=(2, 2, 9, 2), 12 | ssm_d_state=16, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | mlp_ratio=0.0, 16 | downsample_version="v1", 17 | patchembed_version="v1", 18 | # forward_type="v0", # if you want exactly the same 19 | ),) 20 | # train_dataloader = dict(batch_size=4) # as gpus=4 21 | 22 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-640x640_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './upernet_swin_4xb4-160k_ade20k-640x640_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 27, 2), 12 | ssm_d_state=16, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | mlp_ratio=0.0, 16 | downsample_version="v1", 17 | patchembed_version="v1", 18 | # forward_type="v0", # if you want exactly the same 19 | ),) 20 | # train_dataloader = dict(batch_size=4) # as gpus=4 21 | 22 | -------------------------------------------------------------------------------- /segmentation/configs/vssm/upernet_vssm_4xb4-160k_ade20k-896x896_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './upernet_swin_4xb4-160k_ade20k-896x896_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="../../ckpts/classification/outs/vssm/vssmsmall/vssmsmall_dp03_ckpt_epoch_238.pth", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 27, 2), 12 | ssm_d_state=16, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | mlp_ratio=0.0, 16 | downsample_version="v1", 17 | patchembed_version="v1", 18 | # forward_type="v0", # if you want exactly the same 19 | ),) 20 | train_dataloader = dict(batch_size=4) # as gpus=4 21 | 22 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_base_224.yaml 10 | dims=128, 11 | depths=(2, 2, 15, 2), 12 | ssm_d_state=1, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | ssm_conv=3, 16 | ssm_conv_bias=False, 17 | forward_type="v05_noz", # v3_noz, 18 | mlp_ratio=4.0, 19 | downsample_version="v3", 20 | patchembed_version="v2", 21 | drop_path_rate=0.6, 22 | norm_layer="ln2d", 23 | ),) 24 | # train_dataloader = dict(batch_size=4) # as gpus=4 25 | 26 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 15, 2), 12 | ssm_d_state=1, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | ssm_conv=3, 16 | ssm_conv_bias=False, 17 | forward_type="v05_noz", # v3_noz, 18 | mlp_ratio=4.0, 19 | downsample_version="v3", 20 | patchembed_version="v2", 21 | drop_path_rate=0.3, 22 | norm_layer="ln2d", 23 | ),) 24 | # train_dataloader = dict(batch_size=4) # as gpus=4 25 | 26 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 10 | dims=96, 11 | # depths=(2, 2, 5, 2), 12 | depths=(2, 2, 8, 2), 13 | ssm_d_state=1, 14 | ssm_dt_rank="auto", 15 | # ssm_ratio=2.0, 16 | ssm_ratio=1.0, 17 | ssm_conv=3, 18 | ssm_conv_bias=False, 19 | forward_type="v05_noz", # v3_noz, 20 | mlp_ratio=4.0, 21 | downsample_version="v3", 22 | patchembed_version="v2", 23 | drop_path_rate=0.2, 24 | norm_layer="ln2d", 25 | ),) 26 | # train_dataloader = dict(batch_size=4) # as gpus=4 27 | 28 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-512x512_tiny1.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_tiny_224.yaml 10 | dims=96, 11 | depths=(2, 2, 5, 2), 12 | ssm_d_state=1, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | ssm_conv=3, 16 | ssm_conv_bias=False, 17 | forward_type="v05_noz", # v3_noz, 18 | mlp_ratio=4.0, 19 | downsample_version="v3", 20 | patchembed_version="v2", 21 | drop_path_rate=0.2, 22 | norm_layer="ln2d", 23 | ),) 24 | # train_dataloader = dict(batch_size=4) # as gpus=4 25 | 26 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-640x640_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../vssm/upernet_swin_4xb4-160k_ade20k-640x640_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 15, 2), 12 | ssm_d_state=1, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | ssm_conv=3, 16 | ssm_conv_bias=False, 17 | forward_type="v05_noz", # v3_noz, 18 | mlp_ratio=4.0, 19 | downsample_version="v3", 20 | patchembed_version="v2", 21 | drop_path_rate=0.3, 22 | norm_layer="ln2d", 23 | ),) 24 | # train_dataloader = dict(batch_size=4) # as gpus=4 25 | 26 | -------------------------------------------------------------------------------- /segmentation/configs/vssm1/upernet_vssm_4xb4-160k_ade20k-896x896_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../vssm/upernet_swin_4xb4-160k_ade20k-896x896_small.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='MM_VSSM', 7 | out_indices=(0, 1, 2, 3), 8 | pretrained="", 9 | # copied from classification/configs/vssm/vssm_small_224.yaml 10 | dims=96, 11 | depths=(2, 2, 15, 2), 12 | ssm_d_state=1, 13 | ssm_dt_rank="auto", 14 | ssm_ratio=2.0, 15 | ssm_conv=3, 16 | ssm_conv_bias=False, 17 | forward_type="v05_noz", # v3_noz, 18 | mlp_ratio=4.0, 19 | downsample_version="v3", 20 | patchembed_version="v2", 21 | drop_path_rate=0.3, 22 | norm_layer="ln2d", 23 | ),) 24 | train_dataloader = dict(batch_size=4) # as gpus=4 25 | 26 | -------------------------------------------------------------------------------- /segmentation/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import partial 3 | from typing import Callable 4 | 5 | import torch 6 | from torch import nn 7 | from torch.utils import checkpoint 8 | 9 | from mmengine.model import BaseModule 10 | from mmdet.registry import MODELS as MODELS_MMDET 11 | from mmseg.registry import MODELS as MODELS_MMSEG 12 | 13 | def import_abspy(name="models", path="classification/"): 14 | import sys 15 | import importlib 16 | path = os.path.abspath(path) 17 | assert os.path.isdir(path) 18 | sys.path.insert(0, path) 19 | module = importlib.import_module(name) 20 | sys.path.pop(0) 21 | return module 22 | 23 | build = import_abspy( 24 | "models", 25 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"), 26 | ) 27 | Backbone_VSSM: nn.Module = build.vmamba.Backbone_VSSM 28 | 29 | @MODELS_MMSEG.register_module() 30 | @MODELS_MMDET.register_module() 31 | class MM_VSSM(BaseModule, Backbone_VSSM): 32 | def __init__(self, *args, **kwargs): 33 | BaseModule.__init__(self) 34 | Backbone_VSSM.__init__(self, *args, **kwargs) 35 | 36 | -------------------------------------------------------------------------------- /segmentation/readme.md: -------------------------------------------------------------------------------- 1 | ## origins 2 | `configs/` and `tools/` are copied from https://github.com/open-mmlab/mmsegmentation: `version 1.2.2` 3 | 4 | ## modifications 5 | `tools/train.py#13` is added with `import model` 6 | `tools/test.py#8` is added with `import model` 7 | 8 | -------------------------------------------------------------------------------- /segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /segmentation/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | GPUS=$2 3 | NNODES=${NNODES:-1} 4 | NODE_RANK=${NODE_RANK:-0} 5 | PORT=${PORT:-29500} 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch \ 10 | --nnodes=$NNODES \ 11 | --node_rank=$NODE_RANK \ 12 | --master_addr=$MASTER_ADDR \ 13 | --nproc_per_node=$GPUS \ 14 | --master_port=$PORT \ 15 | $(dirname "$0")/train.py \ 16 | $CONFIG \ 17 | --launcher pytorch ${@:3} 18 | -------------------------------------------------------------------------------- /segmentation/tools/misc/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | from hashlib import sha256 5 | 6 | import torch 7 | 8 | BLOCK_SIZE = 128 * 1024 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Process a checkpoint to be published') 14 | parser.add_argument('in_file', help='input checkpoint filename') 15 | parser.add_argument('out_file', help='output checkpoint filename') 16 | args = parser.parse_args() 17 | return args 18 | 19 | 20 | def sha256sum(filename: str) -> str: 21 | """Compute SHA256 message digest from a file.""" 22 | hash_func = sha256() 23 | byte_array = bytearray(BLOCK_SIZE) 24 | memory_view = memoryview(byte_array) 25 | with open(filename, 'rb', buffering=0) as file: 26 | for block in iter(lambda: file.readinto(memory_view), 0): 27 | hash_func.update(memory_view[:block]) 28 | return hash_func.hexdigest() 29 | 30 | 31 | def process_checkpoint(in_file, out_file): 32 | checkpoint = torch.load(in_file, map_location='cpu') 33 | # remove optimizer for smaller file size 34 | if 'optimizer' in checkpoint: 35 | del checkpoint['optimizer'] 36 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 37 | # add the code here. 38 | torch.save(checkpoint, out_file) 39 | sha = sha256sum(in_file) 40 | final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth' 41 | subprocess.Popen(['mv', out_file, final_file]) 42 | 43 | 44 | def main(): 45 | args = parse_args() 46 | process_checkpoint(args.in_file, args.out_file) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/beit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmengine 7 | import torch 8 | from mmengine.runner import CheckpointLoader 9 | 10 | 11 | def convert_beit(ckpt): 12 | new_ckpt = OrderedDict() 13 | 14 | for k, v in ckpt.items(): 15 | if k.startswith('patch_embed'): 16 | new_key = k.replace('patch_embed.proj', 'patch_embed.projection') 17 | new_ckpt[new_key] = v 18 | if k.startswith('blocks'): 19 | new_key = k.replace('blocks', 'layers') 20 | if 'norm' in new_key: 21 | new_key = new_key.replace('norm', 'ln') 22 | elif 'mlp.fc1' in new_key: 23 | new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0') 24 | elif 'mlp.fc2' in new_key: 25 | new_key = new_key.replace('mlp.fc2', 'ffn.layers.1') 26 | new_ckpt[new_key] = v 27 | else: 28 | new_key = k 29 | new_ckpt[new_key] = v 30 | 31 | return new_ckpt 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser( 36 | description='Convert keys in official pretrained beit models to' 37 | 'MMSegmentation style.') 38 | parser.add_argument('src', help='src model path or url') 39 | # The dst path must be a full path of the new checkpoint. 40 | parser.add_argument('dst', help='save path') 41 | args = parser.parse_args() 42 | 43 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 44 | if 'state_dict' in checkpoint: 45 | state_dict = checkpoint['state_dict'] 46 | elif 'model' in checkpoint: 47 | state_dict = checkpoint['model'] 48 | else: 49 | state_dict = checkpoint 50 | weight = convert_beit(state_dict) 51 | mmengine.mkdir_or_exist(osp.dirname(args.dst)) 52 | torch.save(weight, args.dst) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-4} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/test_torchserve.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser 3 | from io import BytesIO 4 | 5 | import matplotlib.pyplot as plt 6 | import mmcv 7 | import requests 8 | 9 | from mmseg.apis import inference_model, init_model 10 | 11 | 12 | def parse_args(): 13 | parser = ArgumentParser( 14 | description='Compare result of torchserve and pytorch,' 15 | 'and visualize them.') 16 | parser.add_argument('img', help='Image file') 17 | parser.add_argument('config', help='Config file') 18 | parser.add_argument('checkpoint', help='Checkpoint file') 19 | parser.add_argument('model_name', help='The model name in the server') 20 | parser.add_argument( 21 | '--inference-addr', 22 | default='127.0.0.1:8080', 23 | help='Address and port of the inference server') 24 | parser.add_argument( 25 | '--result-image', 26 | type=str, 27 | default=None, 28 | help='save server output in result-image') 29 | parser.add_argument( 30 | '--device', default='cuda:0', help='Device used for inference') 31 | 32 | args = parser.parse_args() 33 | return args 34 | 35 | 36 | def main(args): 37 | url = 'http://' + args.inference_addr + '/predictions/' + args.model_name 38 | with open(args.img, 'rb') as image: 39 | tmp_res = requests.post(url, image) 40 | content = tmp_res.content 41 | if args.result_image: 42 | with open(args.result_image, 'wb') as out_image: 43 | out_image.write(content) 44 | plt.imshow(mmcv.imread(args.result_image, 'grayscale')) 45 | plt.show() 46 | else: 47 | plt.imshow(plt.imread(BytesIO(content))) 48 | plt.show() 49 | model = init_model(args.config, args.checkpoint, args.device) 50 | image = mmcv.imread(args.img) 51 | result = inference_model(model, image) 52 | plt.imshow(result[0]) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | args = parse_args() 58 | main(args) 59 | --------------------------------------------------------------------------------