├── .flake8
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE.md
├── README.md
├── detection
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── cityscapes_detection.py
    │   │   │   ├── cityscapes_instance.py
    │   │   │   ├── coco_detection.py
    │   │   │   ├── coco_instance.py
    │   │   │   ├── coco_instance_augreg.py
    │   │   │   ├── coco_panoptic.py
    │   │   │   ├── deepfashion.py
    │   │   │   ├── lvis_v0.5_instance.py
    │   │   │   ├── lvis_v1_instance.py
    │   │   │   ├── obj365_detection.py
    │   │   │   ├── voc0712.py
    │   │   │   └── wider_face.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   │   ├── fast_rcnn_r50_fpn.py
    │   │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   │   ├── faster_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_convnext_fpn.py
    │   │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50_caffe_c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   ├── schedule_2x.py
    │   │   │   ├── schedule_3x.py
    │   │   │   └── schedule_6x.py
    │   ├── atss
    │   │   ├── README.md
    │   │   └── atss_deit_adapter_small_fpn_3x_coco.py
    │   ├── cascade_rcnn
    │   │   ├── README.md
    │   │   ├── cascade_mask_rcnn_deit_adapter_base_fpn_3x_coco.py
    │   │   ├── cascade_mask_rcnn_deit_adapter_small_fpn_3x_coco.py
    │   │   └── cascade_mask_rcnn_deit_base_fpn_3x_coco.py
    │   ├── gfl
    │   │   ├── README.md
    │   │   └── gfl_deit_adapter_small_fpn_3x_coco.py
    │   ├── htc++
    │   │   ├── README.md
    │   │   ├── htc++_augreg_adapter_large_fpn_3x_coco.py
    │   │   ├── htc++_augreg_adapter_large_fpn_3x_coco_ms.py
    │   │   ├── htc++_beit_adapter_large_fpn_3x_coco.py
    │   │   ├── htc++_beit_adapter_large_fpn_3x_coco_ms.py
    │   │   ├── htc++_beit_adapter_large_fpn_3x_coco_old.py
    │   │   ├── htc++_beitv2_adapter_large_fpn_3x_coco.py
    │   │   ├── htc++_beitv2_adapter_large_fpn_3x_coco_ms.py
    │   │   ├── htc++_beitv2_adapter_large_fpn_o365_coco.py
    │   │   ├── htc++_beitv2_adapter_large_fpn_o365_coco_ms.py
    │   │   └── htc++_uniperceiver_adapter_large_fpn_3x_coco.py
    │   ├── mask2former
    │   │   ├── README.md
    │   │   └── mask2former_beitv2_adapter_large_16x1_3x_coco-panoptic.py
    │   ├── mask_rcnn
    │   │   ├── README.md
    │   │   ├── dinov2
    │   │   │   ├── README.md
    │   │   │   ├── mask_rcnn_dinov2_adapter_base_fpn_3x_coco.py
    │   │   │   ├── mask_rcnn_dinov2_adapter_large_fpn_3x_coco.py
    │   │   │   └── mask_rcnn_dinov2_adapter_small_fpn_3x_coco.py
    │   │   ├── mask_rcnn_augreg_adapter_large_fpn_3x_coco.py
    │   │   ├── mask_rcnn_augreg_large_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_adapter_base_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_adapter_small_3x_coco.py
    │   │   ├── mask_rcnn_deit_adapter_small_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_adapter_tiny_fpn_1x_coco.py
    │   │   ├── mask_rcnn_deit_adapter_tiny_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_base_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_small_fpn_3x_coco.py
    │   │   ├── mask_rcnn_deit_tiny_fpn_3x_coco.py
    │   │   └── mask_rcnn_uniperceiver_adapter_base_fpn_3x_coco.py
    │   ├── sparse_rcnn
    │   │   ├── README.md
    │   │   └── sparse_rcnn_deit_adapter_small_fpn_3x_coco.py
    │   └── upgraded_mask_rcnn
    │   │   ├── README.md
    │   │   ├── mask_rcnn_mae_adapter_base_lsj_fpn_25ep_coco.py
    │   │   └── mask_rcnn_mae_adapter_base_lsj_fpn_50ep_coco.py
    ├── convert_14to16.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── get_flops.py
    ├── image_demo.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   ├── customized_text.py
    │   ├── layer_decay_optimizer_constructor.py
    │   ├── my_checkpoint.py
    │   └── uniperceiver_converter.py
    ├── mmdet_custom
    │   ├── __init__.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │       ├── __init__.py
    │   │       ├── adapter_modules.py
    │   │       ├── base
    │   │       │   ├── beit.py
    │   │       │   ├── uniperceiver.py
    │   │       │   └── vit.py
    │   │       ├── beit_adapter.py
    │   │       ├── uniperceiver_adapter.py
    │   │       ├── vit_adapter.py
    │   │       └── vit_baseline.py
    │   │   ├── detectors
    │   │       ├── __init__.py
    │   │       └── htc_aug.py
    │   │   └── necks
    │   │       ├── __init__.py
    │   │       ├── channel_mapper.py
    │   │       └── extra_attention.py
    ├── ops
    │   ├── README.md
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── ms_deform_attn_func.py
    │   ├── make.sh
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── ms_deform_attn.py
    │   ├── setup.py
    │   ├── src
    │   │   ├── cpu
    │   │   │   ├── ms_deform_attn_cpu.cpp
    │   │   │   └── ms_deform_attn_cpu.h
    │   │   ├── cuda
    │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   ├── ms_deform_attn.h
    │   │   └── vision.cpp
    │   └── test.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── train.py
    └── video_demo.py
├── segmentation
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── ade20k.py
    │   │   │   ├── chase_db1.py
    │   │   │   ├── cityscapes.py
    │   │   │   ├── cityscapes_1024x1024.py
    │   │   │   ├── cityscapes_768x768.py
    │   │   │   ├── cityscapes_769x769.py
    │   │   │   ├── cityscapes_832x832.py
    │   │   │   ├── cityscapes_896x896.py
    │   │   │   ├── coco-stuff10k.py
    │   │   │   ├── coco-stuff164k.py
    │   │   │   ├── drive.py
    │   │   │   ├── hrf.py
    │   │   │   ├── loveda.py
    │   │   │   ├── mapillary_896x896.py
    │   │   │   ├── nyu_depth_v2.py
    │   │   │   ├── pascal_context.py
    │   │   │   ├── pascal_context_59.py
    │   │   │   ├── pascal_voc12.py
    │   │   │   ├── pascal_voc12_aug.py
    │   │   │   ├── potsdam.py
    │   │   │   └── stare.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── ann_r50-d8.py
    │   │   │   ├── apcnet_r50-d8.py
    │   │   │   ├── bisenetv1_r18-d32.py
    │   │   │   ├── bisenetv2.py
    │   │   │   ├── ccnet_r50-d8.py
    │   │   │   ├── cgnet.py
    │   │   │   ├── danet_r50-d8.py
    │   │   │   ├── deeplabv3_r50-d8.py
    │   │   │   ├── deeplabv3_unet_s5-d16.py
    │   │   │   ├── deeplabv3plus_r50-d8.py
    │   │   │   ├── dmnet_r50-d8.py
    │   │   │   ├── dnl_r50-d8.py
    │   │   │   ├── dpt_vit-b16.py
    │   │   │   ├── emanet_r50-d8.py
    │   │   │   ├── encnet_r50-d8.py
    │   │   │   ├── erfnet_fcn.py
    │   │   │   ├── fast_scnn.py
    │   │   │   ├── fastfcn_r50-d32_jpu_psp.py
    │   │   │   ├── fcn_hr18.py
    │   │   │   ├── fcn_r50-d8.py
    │   │   │   ├── fcn_unet_s5-d16.py
    │   │   │   ├── fpn_r50.py
    │   │   │   ├── gcnet_r50-d8.py
    │   │   │   ├── icnet_r50-d8.py
    │   │   │   ├── isanet_r50-d8.py
    │   │   │   ├── lraspp_m-v3-d8.py
    │   │   │   ├── mask2former_beit.py
    │   │   │   ├── mask2former_beit_chase_db1.py
    │   │   │   ├── mask2former_beit_cityscapes.py
    │   │   │   ├── mask2former_beit_cocostuff.py
    │   │   │   ├── mask2former_beit_pascal.py
    │   │   │   ├── mask2former_beit_potsdam.py
    │   │   │   ├── maskformer_beit.py
    │   │   │   ├── nonlocal_r50-d8.py
    │   │   │   ├── ocrnet_hr18.py
    │   │   │   ├── ocrnet_r50-d8.py
    │   │   │   ├── pointrend_r50.py
    │   │   │   ├── psanet_r50-d8.py
    │   │   │   ├── pspnet_r50-d8.py
    │   │   │   ├── pspnet_unet_s5-d16.py
    │   │   │   ├── segformer_mit-b0.py
    │   │   │   ├── setr_mla.py
    │   │   │   ├── setr_naive.py
    │   │   │   ├── setr_pup.py
    │   │   │   ├── stdc.py
    │   │   │   ├── twins_pcpvt-s_fpn.py
    │   │   │   ├── twins_pcpvt-s_upernet.py
    │   │   │   ├── upernet_beit.py
    │   │   │   ├── upernet_r50.py
    │   │   │   ├── upernet_swin.py
    │   │   │   └── upernet_vit-b16_ln_mln.py
    │   │   └── schedules
    │   │   │   ├── schedule_160k.py
    │   │   │   ├── schedule_20k.py
    │   │   │   ├── schedule_320k.py
    │   │   │   ├── schedule_40k.py
    │   │   │   └── schedule_80k.py
    │   ├── ade20k
    │   │   ├── README.md
    │   │   ├── mask2former_beit_adapter_large_640_160k_ade20k_ms.py
    │   │   ├── mask2former_beit_adapter_large_640_160k_ade20k_ss.py
    │   │   ├── mask2former_beit_adapter_large_896_80k_ade20k_ms.py
    │   │   ├── mask2former_beit_adapter_large_896_80k_ade20k_ss.py
    │   │   ├── mask2former_beitv2_adapter_large_896_160k_ade20k_ss.py
    │   │   ├── mask2former_beitv2_adapter_large_896_80k_ade20k_ms.py
    │   │   ├── mask2former_beitv2_adapter_large_896_80k_ade20k_ss.py
    │   │   ├── upernet_augreg_adapter_base_512_160k_ade20k.py
    │   │   ├── upernet_augreg_adapter_large_512_160k_ade20k.py
    │   │   ├── upernet_augreg_adapter_tiny_512_160k_ade20k.py
    │   │   ├── upernet_beit_adapter_large_640_160k_ade20k_ms.py
    │   │   ├── upernet_beit_adapter_large_640_160k_ade20k_ss.py
    │   │   ├── upernet_beit_large_512_160k_ade20k_ms.py
    │   │   ├── upernet_beit_large_512_160k_ade20k_ss.py
    │   │   ├── upernet_deit_adapter_base_512_160k_ade20k.py
    │   │   ├── upernet_deit_adapter_light_base_512_160k_ade20k.py
    │   │   ├── upernet_deit_adapter_small_512_160k_ade20k.py
    │   │   ├── upernet_deit_adapter_tiny_512_160k_ade20k.py
    │   │   └── upernet_uniperceiver_adapter_large_512_160k_ade20k.py
    │   ├── chase_db1
    │   │   ├── README.md
    │   │   └── mask2former_beit_adapter_large_128_40k_chase_db1_ss.py
    │   ├── cityscapes
    │   │   ├── README.md
    │   │   ├── mask2former_beit_adapter_large_896_80k_cityscapes_ms.py
    │   │   ├── mask2former_beit_adapter_large_896_80k_cityscapes_ss.py
    │   │   └── mask2former_beit_adapter_large_896_80k_mapillary_ss.py
    │   ├── coco_stuff10k
    │   │   ├── README.md
    │   │   ├── mask2former_beit_adapter_base_512_40k_cocostuff10k_ms.py
    │   │   ├── mask2former_beit_adapter_base_512_40k_cocostuff10k_ss.py
    │   │   ├── mask2former_beit_adapter_large_512_40k_cocostuff10k_ms.py
    │   │   ├── mask2former_beit_adapter_large_512_40k_cocostuff10k_ss.py
    │   │   ├── upernet_beit_adapter_large_512_80k_cocostuff10k_ms.py
    │   │   └── upernet_beit_adapter_large_512_80k_cocostuff10k_ss.py
    │   ├── coco_stuff164k
    │   │   ├── README.md
    │   │   ├── mask2former_beit_adapter_large_896_80k_cocostuff164k_ms.py
    │   │   ├── mask2former_beit_adapter_large_896_80k_cocostuff164k_ss.py
    │   │   ├── mask2former_beitv2_adapter_large_896_80k_cocostuff164k_ss.py
    │   │   ├── upernet_beit_adapter_large_640_80k_cocostuff164k_ms.py
    │   │   └── upernet_beit_adapter_large_640_80k_cocostuff164k_ss.py
    │   ├── pascal_context
    │   │   ├── README.md
    │   │   ├── mask2former_beit_adapter_base_480_40k_pascal_context_59_ms.py
    │   │   ├── mask2former_beit_adapter_base_480_40k_pascal_context_59_ss.py
    │   │   ├── mask2former_beit_adapter_large_480_40k_pascal_context_59_ms.py
    │   │   ├── mask2former_beit_adapter_large_480_40k_pascal_context_59_ss.py
    │   │   ├── upernet_beit_adapter_large_480_80k_pascal_context_59_ms.py
    │   │   └── upernet_beit_adapter_large_480_80k_pascal_context_59_ss.py
    │   └── potsdam
    │   │   ├── README.md
    │   │   └── mask2former_beit_adapter_large_512_80k_potsdam_ss.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── get_flops.py
    ├── image_demo.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   ├── customized_text.py
    │   ├── layer_decay_optimizer_constructor.py
    │   └── my_checkpoint.py
    ├── mmseg_custom
    │   ├── __init__.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── anchor
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── point_generator.py
    │   │   ├── box
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── samplers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_sampler.py
    │   │   │   │   ├── mask_pseudo_sampler.py
    │   │   │   │   ├── mask_sampling_result.py
    │   │   │   │   └── sampling_result.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   └── panoptic_utils.py
    │   │   ├── mask
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── dist_utils.py
    │   │   │   └── misc.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── mapillary.py
    │   │   ├── pipelines
    │   │   │   ├── __init__.py
    │   │   │   ├── formatting.py
    │   │   │   └── transform.py
    │   │   └── potsdam.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │       ├── __init__.py
    │   │       ├── adapter_modules.py
    │   │       ├── base
    │   │       │   ├── beit.py
    │   │       │   ├── uniperceiver.py
    │   │       │   └── vit.py
    │   │       ├── beit_adapter.py
    │   │       ├── beit_baseline.py
    │   │       ├── uniperceiver_adapter.py
    │   │       ├── vit_adapter.py
    │   │       └── vit_baseline.py
    │   │   ├── builder.py
    │   │   ├── decode_heads
    │   │       ├── __init__.py
    │   │       ├── mask2former_head.py
    │   │       └── maskformer_head.py
    │   │   ├── losses
    │   │       ├── __init__.py
    │   │       ├── cross_entropy_loss.py
    │   │       ├── dice_loss.py
    │   │       ├── focal_loss.py
    │   │       ├── match_costs.py
    │   │       └── match_loss.py
    │   │   ├── plugins
    │   │       ├── __init__.py
    │   │       ├── msdeformattn_pixel_decoder.py
    │   │       └── pixel_decoder.py
    │   │   ├── segmentors
    │   │       ├── __init__.py
    │   │       ├── encoder_decoder_mask2former.py
    │   │       └── encoder_decoder_mask2former_aug.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── assigner.py
    │   │       ├── point_sample.py
    │   │       ├── positional_encoding.py
    │   │       └── transformer.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── train.py
    └── video_demo.py
└── wsdm2023
    ├── README.md
    ├── configs
        ├── _base_
        │   ├── datasets
        │   │   ├── cityscapes_detection.py
        │   │   ├── cityscapes_instance.py
        │   │   ├── coco_detection.py
        │   │   ├── coco_instance.py
        │   │   ├── coco_panoptic.py
        │   │   ├── deepfashion.py
        │   │   ├── grounding_gqa.py
        │   │   ├── lvis_v0.5_instance.py
        │   │   ├── lvis_v1_instance.py
        │   │   ├── refcoco.py
        │   │   ├── voc0712.py
        │   │   ├── wider_face.py
        │   │   ├── wsdm2023.py
        │   │   └── wsdm2023_trainval.py
        │   ├── default_runtime.py
        │   ├── models
        │   │   ├── cascade_mask_rcnn_r50_fpn.py
        │   │   ├── cascade_rcnn_r50_fpn.py
        │   │   ├── fast_rcnn_r50_fpn.py
        │   │   ├── faster_rcnn_r50_caffe_c4.py
        │   │   ├── faster_rcnn_r50_caffe_dc5.py
        │   │   ├── faster_rcnn_r50_fpn.py
        │   │   ├── mask_rcnn_convnext_fpn.py
        │   │   ├── mask_rcnn_r50_caffe_c4.py
        │   │   ├── mask_rcnn_r50_fpn.py
        │   │   ├── retinanet_r50_fpn.py
        │   │   ├── rpn_r50_caffe_c4.py
        │   │   ├── rpn_r50_fpn.py
        │   │   └── ssd300.py
        │   └── schedules
        │   │   ├── schedule_1x.py
        │   │   ├── schedule_20e.py
        │   │   ├── schedule_2x.py
        │   │   ├── schedule_3x.py
        │   │   └── schedule_6x.py
        ├── dino_4scale_uniperceiver_adapter_base_24ep_gqa_wsdm2023.py
        ├── dino_4scale_uniperceiver_adapter_base_6ep_gqa.py
        ├── dino_4scale_uniperceiver_adapter_large_24ep_gqa_wsdm2023.py
        ├── dino_4scale_uniperceiver_adapter_large_24ep_gqa_wsdm2023_trainval.py
        └── dino_4scale_uniperceiver_adapter_large_6ep_gqa.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── generate_results.py
    ├── mmcv_custom
        ├── __init__.py
        ├── checkpoint.py
        ├── customized_text.py
        └── layer_decay_optimizer_constructor.py
    ├── mmdet_custom
        ├── __init__.py
        ├── apis
        │   ├── __init__.py
        │   └── pipeline.py
        ├── datasets
        │   ├── __init__.py
        │   ├── vg_dataset.py
        │   └── wsdm2023_coco.py
        └── models
        │   ├── __init__.py
        │   ├── backbones
        │       ├── __init__.py
        │       ├── adapter_modules.py
        │       ├── base
        │       │   ├── grounding_block.py
        │       │   └── uniperceiver.py
        │       └── uniperceiver_adapter.py
        │   ├── dense_heads
        │       ├── __init__.py
        │       ├── deformable_detr_head.py
        │       ├── detr_head.py
        │       └── dino_head.py
        │   ├── detectors
        │       ├── __init__.py
        │       └── grounding_dino.py
        │   └── utils
        │       ├── __init__.py
        │       ├── point_sample.py
        │       ├── query_denoising.py
        │       ├── tokenization
        │           ├── __init__.py
        │           ├── bpe_simple_vocab_16e6.txt.gz
        │           ├── builder.py
        │           └── tokenization_clip.py
        │       └── transformer.py
    ├── release.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── tools
        ├── README.md
        ├── convertor.py
        ├── csv2coco.py
        ├── drawbbox.py
        └── paraphrase.py
    └── train.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501, F403, C901, W504, W605, E251, E122, E126, E127
3 | select = E1, E3, E502, E7, E9, W1, W5, W6
4 | max-line-length = 180
5 | exclude=*.egg/*,build,dist,detection/configs/*
6 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 180
 3 | multi_line_output = 0
 4 | extra_standard_library = setuptools
 5 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml
 6 | no_lines_before = STDLIB,LOCALFOLDER
 7 | default_section = THIRDPARTY
 8 | 
 9 | [yapf]
10 | BASED_ON_STYLE = pep8
11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
13 | 
14 | [codespell]
15 | skip = *.ipynb
16 | quiet-level = 3
17 | ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood
18 | © 2022 GitHub, Inc.
19 | Terms
20 | Privacy
21 | Security
22 | Status
23 | Docs
24 | Contact GitHub
25 | Pricing
26 | API
27 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ^detection/configs, ^segmentation/configs
 2 | repos:
 3 |   - repo: https://gitlab.com/pycqa/flake8.git
 4 |     rev: 3.8.3
 5 |     hooks:
 6 |       - id: flake8
 7 |   - repo: https://github.com/PyCQA/isort
 8 |     rev: 5.10.1
 9 |     hooks:
10 |       - id: isort
11 |   - repo: https://github.com/pre-commit/mirrors-yapf
12 |     rev: v0.30.0
13 |     hooks:
14 |       - id: yapf
15 |   - repo: https://github.com/pre-commit/pre-commit-hooks
16 |     rev: v3.1.0
17 |     hooks:
18 |       - id: trailing-whitespace
19 |       - id: check-yaml
20 |       - id: end-of-file-fixer
21 |       - id: requirements-txt-fixer
22 |       - id: double-quote-string-fixer
23 |       - id: check-merge-conflict
24 |       - id: fix-encoding-pragma
25 |         args: ["--remove"]
26 |       - id: mixed-line-ending
27 |         args: ["--fix=lf"]
28 |   - repo: https://github.com/markdownlint/markdownlint
29 |     rev: v0.11.0
30 |     hooks:
31 |       - id: markdownlint
32 |         args: ["-r", "~MD002,~MD013,~MD029,~MD033,~MD034",
33 |               "-t", "allow_different_nesting"]
34 |   - repo: https://github.com/codespell-project/codespell
35 |     rev: v2.1.0
36 |     hooks:
37 |       - id: codespell
38 |   - repo: https://github.com/myint/docformatter
39 |     rev: v1.3.1
40 |     hooks:
41 |       - id: docformatter
42 |         args: ["--in-place", "--wrap-descriptions", "79"]
43 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CityscapesDataset'
 4 | data_root = 'data/cityscapes/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(interval=1, metric='bbox')
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CityscapesDataset'
 4 | data_root = 'data/cityscapes/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CocoDataset'
 4 | data_root = 'data/coco/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(interval=1, metric='bbox')
47 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CocoDataset'
 4 | data_root = 'data/coco/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(metric=['bbox', 'segm'])
47 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance_augreg.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])
50 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | _base_ = 'coco_instance.py'
 4 | dataset_type = 'LVISV05Dataset'
 5 | data_root = 'data/lvis_v0.5/'
 6 | data = dict(samples_per_gpu=2,
 7 |             workers_per_gpu=2,
 8 |             train=dict(_delete_=True,
 9 |                        type='ClassBalancedDataset',
10 |                        oversample_thr=1e-3,
11 |                        dataset=dict(type=dataset_type,
12 |                                     ann_file=data_root +
13 |                                     'annotations/lvis_v0.5_train.json',
14 |                                     img_prefix=data_root + 'train2017/')),
15 |             val=dict(type=dataset_type,
16 |                      ann_file=data_root + 'annotations/lvis_v0.5_val.json',
17 |                      img_prefix=data_root + 'val2017/'),
18 |             test=dict(type=dataset_type,
19 |                       ann_file=data_root + 'annotations/lvis_v0.5_val.json',
20 |                       img_prefix=data_root + 'val2017/'))
21 | evaluation = dict(metric=['bbox', 'segm'])
22 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | _base_ = 'coco_instance.py'
 4 | dataset_type = 'LVISV1Dataset'
 5 | data_root = 'data/lvis_v1/'
 6 | data = dict(samples_per_gpu=2,
 7 |             workers_per_gpu=2,
 8 |             train=dict(_delete_=True,
 9 |                        type='ClassBalancedDataset',
10 |                        oversample_thr=1e-3,
11 |                        dataset=dict(type=dataset_type,
12 |                                     ann_file=data_root +
13 |                                     'annotations/lvis_v1_train.json',
14 |                                     img_prefix=data_root)),
15 |             val=dict(type=dataset_type,
16 |                      ann_file=data_root + 'annotations/lvis_v1_val.json',
17 |                      img_prefix=data_root),
18 |             test=dict(type=dataset_type,
19 |                       ann_file=data_root + 'annotations/lvis_v1_val.json',
20 |                       img_prefix=data_root))
21 | evaluation = dict(metric=['bbox', 'segm'])
22 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/obj365_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'Objects365V2Dataset'
 3 | data_root = 'data/Objects365/Obj365_v2/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/zhiyuan_objv2_train.json',
37 |         img_prefix=data_root + 'train/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/zhiyuan_objv2_val.json',
42 |         img_prefix=data_root + 'val/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/zhiyuan_objv2_val.json',
47 |         img_prefix=data_root + 'val/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'VOCDataset'
 4 | data_root = 'data/VOCdevkit/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1000, 600),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type='RepeatDataset',
35 |                times=3,
36 |                dataset=dict(
37 |                    type=dataset_type,
38 |                    ann_file=[
39 |                        data_root + 'VOC2007/ImageSets/Main/trainval.txt',
40 |                        data_root + 'VOC2012/ImageSets/Main/trainval.txt'
41 |                    ],
42 |                    img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
43 |                    pipeline=train_pipeline)),
44 |     val=dict(type=dataset_type,
45 |              ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
46 |              img_prefix=data_root + 'VOC2007/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
50 |               img_prefix=data_root + 'VOC2007/',
51 |               pipeline=test_pipeline))
52 | evaluation = dict(interval=1, metric='mAP')
53 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | checkpoint_config = dict(interval=1)
 3 | # yapf:disable
 4 | log_config = dict(
 5 |     interval=50,
 6 |     hooks=[
 7 |         dict(type='TextLoggerHook'),
 8 |         # dict(type='TensorboardLoggerHook')
 9 |     ])
10 | # yapf:enable
11 | custom_hooks = [dict(type='NumClassCheckHook')]
12 | # evaluation = dict(save_best='auto')
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | load_from = None
16 | resume_from = None
17 | workflow = [('train', 1)]
18 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=3,
 8 |         strides=(1, 2, 2),
 9 |         dilations=(1, 1, 1),
10 |         out_indices=(2, ),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=False),
13 |         norm_eval=True,
14 |         style='caffe',
15 |         init_cfg=dict(
16 |             type='Pretrained',
17 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 |     neck=None,
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=1024,
22 |         feat_channels=1024,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[2, 4, 8, 16, 32],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[16]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=12000,
56 |             max_per_img=2000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     backbone=dict(
 6 |         type='SSDVGG',
 7 |         depth=16,
 8 |         with_last_pool=False,
 9 |         ceil_mode=True,
10 |         out_indices=(3, 4),
11 |         out_feature_indices=(22, 34),
12 |         init_cfg=dict(
13 |             type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 |     neck=dict(
15 |         type='SSDNeck',
16 |         in_channels=(512, 1024),
17 |         out_channels=(512, 1024, 512, 256, 256, 256),
18 |         level_strides=(2, 2, 1, 1),
19 |         level_paddings=(1, 1, 0, 0),
20 |         l2_norm_scale=20),
21 |     bbox_head=dict(
22 |         type='SSDHead',
23 |         in_channels=(512, 1024, 512, 256, 256, 256),
24 |         num_classes=80,
25 |         anchor_generator=dict(
26 |             type='SSDAnchorGenerator',
27 |             scale_major=False,
28 |             input_size=input_size,
29 |             basesize_ratio_range=(0.15, 0.9),
30 |             strides=[8, 16, 32, 64, 100, 300],
31 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 |         bbox_coder=dict(
33 |             type='DeltaXYWHBBoxCoder',
34 |             target_means=[.0, .0, .0, .0],
35 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
36 |     # model training and testing settings
37 |     train_cfg=dict(
38 |         assigner=dict(
39 |             type='MaxIoUAssigner',
40 |             pos_iou_thr=0.5,
41 |             neg_iou_thr=0.5,
42 |             min_pos_iou=0.,
43 |             ignore_iof_thr=-1,
44 |             gt_max_assign_all=False),
45 |         smoothl1_beta=1.,
46 |         allowed_border=-1,
47 |         pos_weight=-1,
48 |         neg_pos_ratio=3,
49 |         debug=False),
50 |     test_cfg=dict(
51 |         nms_pre=1000,
52 |         nms=dict(type='nms', iou_threshold=0.45),
53 |         min_bbox_size=0,
54 |         score_thr=0.02,
55 |         max_per_img=200))
56 | cudnn_benchmark = True
57 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[27, 33])
11 | runner = dict(type='EpochBasedRunner', max_epochs=36)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_6x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=2000,
 9 |     warmup_ratio=0.001,
10 |     step=[62, 68])
11 | runner = dict(type='EpochBasedRunner', max_epochs=72)
12 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn/mask_rcnn_deit_adapter_tiny_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | _base_ = [
 3 |     '../_base_/models/mask_rcnn_r50_fpn.py',
 4 |     '../_base_/datasets/coco_instance.py',
 5 |     '../_base_/schedules/schedule_1x.py',
 6 |     '../_base_/default_runtime.py'
 7 | ]
 8 | # pretrained = 'https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth'
 9 | pretrained = 'pretrained/deit_tiny_patch16_224-a1311bcf.pth'
10 | model = dict(
11 |     backbone=dict(
12 |         _delete_=True,
13 |         type='ViTAdapter',
14 |         patch_size=16,
15 |         embed_dim=192,
16 |         depth=12,
17 |         num_heads=3,
18 |         mlp_ratio=4,
19 |         drop_path_rate=0.1,
20 |         conv_inplane=64,
21 |         n_points=4,
22 |         deform_num_heads=6,
23 |         cffn_ratio=0.25,
24 |         deform_ratio=1.0,
25 |         layer_scale=False,
26 |         interaction_indexes=[[0, 2], [3, 5], [6, 8], [9, 11]],
27 |         window_attn=[True, True, False, True, True, False,
28 |                      True, True, False, True, True, False],
29 |         window_size=[14, 14, None, 14, 14, None,
30 |                      14, 14, None, 14, 14, None],
31 |         pretrained=pretrained),
32 |     neck=dict(
33 |         type='FPN',
34 |         in_channels=[192, 192, 192, 192],
35 |         out_channels=256,
36 |         num_outs=5))
37 | data = dict(
38 |     samples_per_gpu=2,
39 |     workers_per_gpu=2)
40 | optimizer = dict(
41 |     _delete_=True, type='AdamW', lr=0.0002, weight_decay=0.01,
42 |     paramwise_cfg=dict(
43 |     custom_keys={
44 |         'level_embed': dict(decay_mult=0.),
45 |         'pos_embed': dict(decay_mult=0.),
46 |         'norm': dict(decay_mult=0.),
47 |         'bias': dict(decay_mult=0.)
48 |     }))
49 | optimizer_config = dict(grad_clip=None)
50 | evaluation = dict(save_best='auto')
51 | # fp16 = dict(loss_scale=dict(init_scale=512))
52 | checkpoint_config = dict(
53 |     interval=1,
54 |     max_keep_ckpts=3,
55 |     save_last=True,
56 | )


--------------------------------------------------------------------------------
/detection/convert_14to16.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | import torch.nn.functional as F
 4 | 
 5 | parser = argparse.ArgumentParser(description='Hyperparams')
 6 | parser.add_argument('filename', nargs='?', type=str, default=None)
 7 | 
 8 | args = parser.parse_args()
 9 | 
10 | model = torch.load(args.filename, map_location=torch.device('cpu'))
11 | 
12 | # resize patch embedding from 14x14 to 16x16
13 | patch_embed = model['patch_embed.proj.weight']
14 | patch_embed = F.interpolate(patch_embed, size=(16, 16), mode='bilinear', align_corners=False)
15 | model['patch_embed.proj.weight'] = patch_embed
16 | 
17 | # rename parameters of layer scale
18 | new_model = {}
19 | for k, v in model.items():
20 |     if "mask_token" in k:
21 |         continue
22 |     new_k = k.replace("ls1.gamma", 'gamma1')
23 |     new_k = new_k.replace("ls2.gamma", 'gamma2')
24 |     new_model[new_k] = v
25 | 
26 | torch.save(new_model, args.filename.replace(".pth", "_14to16.pth"))


--------------------------------------------------------------------------------
/detection/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29600}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/detection/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=63667 \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .checkpoint import load_checkpoint
 3 | from .customized_text import CustomizedTextLoggerHook
 4 | from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
 5 | from .my_checkpoint import my_load_checkpoint
 6 | 
 7 | __all__ = [
 8 |     'LayerDecayOptimizerConstructor',
 9 |     'CustomizedTextLoggerHook',
10 |     'load_checkpoint', 'my_load_checkpoint'
11 | ]
12 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/uniperceiver_converter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | checkpoint = torch.load("../pretrained/uni-perceiver-large-L24-H1024-224size-pretrained.pth",
 4 |                         map_location=torch.device('cpu'))
 5 | checkpoint = checkpoint['model']
 6 | new_checkpoint = {}
 7 | for k, v in checkpoint.items():
 8 |     new_k = k.replace("fused_encoder.", "")
 9 |     new_k = new_k.replace("in_proj_", "in_proj.")
10 |     new_k = new_k.replace("video_embed.", "visual_embed.")
11 |     new_k = new_k.replace("visual_embed.embeddings.weight",
12 |                           "visual_embed.patch_embed.proj.weight")
13 |     new_k = new_k.replace("visual_embed.embeddings.bias",
14 |                           "visual_embed.patch_embed.proj.bias")
15 |     new_k = new_k.replace("visual_embed.embeddings_st_pos.spatial_pos_embed.weight",
16 |                           "visual_embed.patch_embed.spatial_pos_embed.weight")
17 |     new_k = new_k.replace("visual_embed.embeddings_st_pos.temporal_pos_embed.weight",
18 |                           "visual_embed.patch_embed.temporal_pos_embed.weight")
19 | 
20 |     if "loss_prepare" in new_k:
21 |         pass
22 |     elif "token_embed" in new_k:
23 |         pass
24 |     else:
25 |         new_checkpoint[new_k] = v
26 |         
27 | for k, v in new_checkpoint.items():
28 |     print(k, v.shape)
29 | 
30 | torch.save(new_checkpoint,
31 |            "../pretrained/uni-perceiver-large-L24-H1024-224size-pretrained_converted.pth")
32 | print("saved!")
33 | 
34 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .models import *  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .backbones import *  # noqa: F401,F403
3 | from .necks import *  # noqa: F401,F403
4 | from .detectors import *  # noqa: F401,F403


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .beit_adapter import BEiTAdapter
3 | from .uniperceiver_adapter import UniPerceiverAdapter
4 | from .vit_adapter import ViTAdapter
5 | from .vit_baseline import ViTBaseline
6 | 
7 | __all__ = ['UniPerceiverAdapter', 'ViTAdapter', 'ViTBaseline', 'BEiTAdapter']
8 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .htc_aug import HybridTaskCascadeAug
2 | 
3 | 
4 | __all__ = ['HybridTaskCascadeAug']


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .channel_mapper import ChannelMapperWithPooling
3 | from .extra_attention import ExtraAttention
4 | 
5 | __all__ = ['ExtraAttention', 'ChannelMapperWithPooling']
6 | 


--------------------------------------------------------------------------------
/detection/ops/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | sh make.sh
3 | ```
4 | 


--------------------------------------------------------------------------------
/detection/ops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn_func import MSDeformAttnFunction
10 | 
11 | __all__ = ['MSDeformAttnFunction']
12 | 


--------------------------------------------------------------------------------
/detection/ops/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | # ------------------------------------------------------------------------------------------------
 9 | 
10 | python setup.py build install
11 | 


--------------------------------------------------------------------------------
/detection/ops/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn import MSDeformAttn
10 | 
11 | __all__ = ['MSDeformAttn']
12 | 


--------------------------------------------------------------------------------
/detection/ops/src/cpu/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | 
17 | at::Tensor
18 | ms_deform_attn_cpu_forward(
19 |     const at::Tensor &value,
20 |     const at::Tensor &spatial_shapes,
21 |     const at::Tensor &level_start_index,
22 |     const at::Tensor &sampling_loc,
23 |     const at::Tensor &attn_weight,
24 |     const int im2col_step)
25 | {
26 |     AT_ERROR("Not implement on cpu");
27 | }
28 | 
29 | std::vector<at::Tensor>
30 | ms_deform_attn_cpu_backward(
31 |     const at::Tensor &value,
32 |     const at::Tensor &spatial_shapes,
33 |     const at::Tensor &level_start_index,
34 |     const at::Tensor &sampling_loc,
35 |     const at::Tensor &attn_weight,
36 |     const at::Tensor &grad_output,
37 |     const int im2col_step)
38 | {
39 |     AT_ERROR("Not implement on cpu");
40 | }
41 | 


--------------------------------------------------------------------------------
/detection/ops/src/cpu/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor
15 | ms_deform_attn_cpu_forward(
16 |     const at::Tensor &value,
17 |     const at::Tensor &spatial_shapes,
18 |     const at::Tensor &level_start_index,
19 |     const at::Tensor &sampling_loc,
20 |     const at::Tensor &attn_weight,
21 |     const int im2col_step);
22 | 
23 | std::vector<at::Tensor>
24 | ms_deform_attn_cpu_backward(
25 |     const at::Tensor &value,
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/detection/ops/src/cuda/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor ms_deform_attn_cuda_forward(
15 |     const at::Tensor &value,
16 |     const at::Tensor &spatial_shapes,
17 |     const at::Tensor &level_start_index,
18 |     const at::Tensor &sampling_loc,
19 |     const at::Tensor &attn_weight,
20 |     const int im2col_step);
21 | 
22 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
23 |     const at::Tensor &value,
24 |     const at::Tensor &spatial_shapes,
25 |     const at::Tensor &level_start_index,
26 |     const at::Tensor &sampling_loc,
27 |     const at::Tensor &attn_weight,
28 |     const at::Tensor &grad_output,
29 |     const int im2col_step);
30 | 


--------------------------------------------------------------------------------
/detection/ops/src/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | 
13 | #include "cpu/ms_deform_attn_cpu.h"
14 | 
15 | #ifdef WITH_CUDA
16 | #include "cuda/ms_deform_attn_cuda.h"
17 | #endif
18 | 
19 | 
20 | at::Tensor
21 | ms_deform_attn_forward(
22 |     const at::Tensor &value,
23 |     const at::Tensor &spatial_shapes,
24 |     const at::Tensor &level_start_index,
25 |     const at::Tensor &sampling_loc,
26 |     const at::Tensor &attn_weight,
27 |     const int im2col_step)
28 | {
29 |     if (value.type().is_cuda())
30 |     {
31 | #ifdef WITH_CUDA
32 |         return ms_deform_attn_cuda_forward(
33 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
34 | #else
35 |         AT_ERROR("Not compiled with GPU support");
36 | #endif
37 |     }
38 |     AT_ERROR("Not implemented on the CPU");
39 | }
40 | 
41 | std::vector<at::Tensor>
42 | ms_deform_attn_backward(
43 |     const at::Tensor &value,
44 |     const at::Tensor &spatial_shapes,
45 |     const at::Tensor &level_start_index,
46 |     const at::Tensor &sampling_loc,
47 |     const at::Tensor &attn_weight,
48 |     const at::Tensor &grad_output,
49 |     const int im2col_step)
50 | {
51 |     if (value.type().is_cuda())
52 |     {
53 | #ifdef WITH_CUDA
54 |         return ms_deform_attn_cuda_backward(
55 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
56 | #else
57 |         AT_ERROR("Not compiled with GPU support");
58 | #endif
59 |     }
60 |     AT_ERROR("Not implemented on the CPU");
61 | }
62 | 


--------------------------------------------------------------------------------
/detection/ops/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include "ms_deform_attn.h"
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
15 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
16 | }
17 | 


--------------------------------------------------------------------------------
/detection/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/detection/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/detection/video_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import cv2
 5 | import mmcv
 6 | 
 7 | from mmdet.apis import inference_detector, init_detector
 8 | import mmcv_custom  # noqa: F401,F403
 9 | import mmdet_custom  # noqa: F401,F403
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='MMDetection video demo')
13 |     parser.add_argument('video', help='Video file')
14 |     parser.add_argument('config', help='Config file')
15 |     parser.add_argument('checkpoint', help='Checkpoint file')
16 |     parser.add_argument(
17 |         '--device', default='cuda:0', help='Device used for inference')
18 |     parser.add_argument(
19 |         '--score-thr', type=float, default=0.3, help='Bbox score threshold')
20 |     parser.add_argument('--out', type=str, help='Output video file')
21 |     parser.add_argument('--show', action='store_true', help='Show video')
22 |     parser.add_argument(
23 |         '--wait-time',
24 |         type=float,
25 |         default=1,
26 |         help='The interval of show (s), 0 is block')
27 |     args = parser.parse_args()
28 |     return args
29 | 
30 | 
31 | def main():
32 |     args = parse_args()
33 |     assert args.out or args.show, \
34 |         ('Please specify at least one operation (save/show the '
35 |          'video) with the argument "--out" or "--show"')
36 | 
37 |     model = init_detector(args.config, args.checkpoint, device=args.device)
38 | 
39 |     video_reader = mmcv.VideoReader(args.video)
40 |     video_writer = None
41 |     if args.out:
42 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
43 |         video_writer = cv2.VideoWriter(
44 |             args.out, fourcc, video_reader.fps,
45 |             (video_reader.width, video_reader.height))
46 | 
47 |     for frame in mmcv.track_iter_progress(video_reader):
48 |         result = inference_detector(model, frame)
49 |         frame = model.show_result(frame, result, score_thr=args.score_thr)
50 |         if args.show:
51 |             cv2.namedWindow('video', 0)
52 |             mmcv.imshow(frame, 'video', args.wait_time)
53 |         if args.out:
54 |             video_writer.write(frame)
55 | 
56 |     if video_writer:
57 |         video_writer.release()
58 |     cv2.destroyAllWindows()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = 'data/ade/ADEChallengeData2016'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='images/training',
41 |         ann_dir='annotations/training',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='images/validation',
47 |         ann_dir='annotations/validation',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/chase_db1.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ChaseDB1Dataset'
 3 | data_root = 'data/CHASE_DB1'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (960, 999)
 7 | crop_size = (128, 128)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 1024)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=2,
36 |     workers_per_gpu=2,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='leftImg8bit/train',
41 |         ann_dir='gtFine/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='leftImg8bit/val',
47 |         ann_dir='gtFine/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='leftImg8bit/val',
53 |         ann_dir='gtFine/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (1024, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_768x768.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (768, 768)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2049, 1025),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_769x769.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (769, 769)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2049, 1025),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_832x832.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (832, 832)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_896x896.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (896, 896)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/coco-stuff10k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'COCOStuffDataset'
 3 | data_root = 'data/coco_stuff10k'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         reduce_zero_label=True,
41 |         img_dir='images/train2014',
42 |         ann_dir='annotations/train2014',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         reduce_zero_label=True,
48 |         img_dir='images/test2014',
49 |         ann_dir='annotations/test2014',
50 |         pipeline=test_pipeline),
51 |     test=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         reduce_zero_label=True,
55 |         img_dir='images/test2014',
56 |         ann_dir='annotations/test2014',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/coco-stuff164k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'COCOStuffDataset'
 3 | data_root = 'data/coco_stuff164k'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='images/train2017',
41 |         ann_dir='annotations/train2017',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='images/val2017',
47 |         ann_dir='annotations/val2017',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/val2017',
53 |         ann_dir='annotations/val2017',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/drive.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DRIVEDataset'
 3 | data_root = 'data/DRIVE'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (584, 565)
 7 | crop_size = (64, 64)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/hrf.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'HRFDataset'
 3 | data_root = 'data/HRF'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (2336, 3504)
 7 | crop_size = (256, 256)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'LoveDADataset'
 3 | data_root = 'data/loveDA'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1024, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='img_dir/train',
41 |         ann_dir='ann_dir/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='img_dir/val',
47 |         ann_dir='ann_dir/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/mapillary_896x896.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'MapillaryDataset'
 3 | data_root = 'data/Mapillary/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (896, 896)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='MapillaryHack'),
11 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 1.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=(2048, 1024),
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img']),
33 |         ])
34 | ]
35 | data = dict(
36 |     samples_per_gpu=2,
37 |     workers_per_gpu=2,
38 |     train=dict(
39 |         type=dataset_type,
40 |         data_root='data/Mapillary/',
41 |         img_dir=['training/images', 'validation/images'],
42 |         ann_dir=['training/labels', 'validation/labels'],
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type='CityscapesDataset',
46 |         data_root='data/cityscapes/',
47 |         img_dir='leftImg8bit/val',
48 |         ann_dir='gtFine/val',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type='CityscapesDataset',
52 |         data_root='data/cityscapes/',
53 |         img_dir='leftImg8bit/val',
54 |         ann_dir='gtFine/val',
55 |         pipeline=test_pipeline))
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/nyu_depth_v2.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'NYUDepthV2Dataset'
 3 | data_root = 'data/nyu_depth_v2/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | crop_size = (480, 480)
 8 | 
 9 | train_pipeline = [
10 |     dict(type='LoadImageFromFile'),
11 |     dict(type='LoadAnnotations', reduce_zero_label=True),
12 |     dict(type='Resize', img_scale=(640, 480), ratio_range=(0.5, 2.0)),
13 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14 |     dict(type='RandomFlip', prob=0.5),
15 |     dict(type='PhotoMetricDistortion'),
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
18 |     dict(type='DefaultFormatBundle'),
19 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
20 | ]
21 | test_pipeline = [
22 |     dict(type='LoadImageFromFile'),
23 |     dict(
24 |         type='MultiScaleFlipAug',
25 |         img_scale=(640, 480),
26 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
27 |         flip=False,
28 |         transforms=[
29 |             dict(type='Resize', keep_ratio=True),
30 |             dict(type='RandomFlip'),
31 |             dict(type='Normalize', **img_norm_cfg),
32 |             dict(type='ImageToTensor', keys=['img']),
33 |             dict(type='Collect', keys=['img']),
34 |         ])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_root=data_root,
42 |         img_dir='image',
43 |         ann_dir='label40',
44 |         split='train.txt',
45 |         pipeline=train_pipeline),
46 |     val=dict(
47 |         type=dataset_type,
48 |         data_root=data_root,
49 |         img_dir='image',
50 |         ann_dir='label40',
51 |         split='test.txt',
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         data_root=data_root,
56 |         img_dir='image',
57 |         ann_dir='label40',
58 |         split='test.txt',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/pascal_context.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalContextDataset'
 3 | data_root = 'data/VOCdevkit/VOC2010/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | img_scale = (520, 520)
 8 | crop_size = (480, 480)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations'),
13 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=img_scale,
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=4,
39 |     workers_per_gpu=4,
40 |     train=dict(
41 |         type=dataset_type,
42 |         data_root=data_root,
43 |         img_dir='JPEGImages',
44 |         ann_dir='SegmentationClassContext',
45 |         split='ImageSets/SegmentationContext/train.txt',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         img_dir='JPEGImages',
51 |         ann_dir='SegmentationClassContext',
52 |         split='ImageSets/SegmentationContext/val.txt',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='JPEGImages',
58 |         ann_dir='SegmentationClassContext',
59 |         split='ImageSets/SegmentationContext/val.txt',
60 |         pipeline=test_pipeline))
61 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/pascal_context_59.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalContextDataset59'
 3 | data_root = 'data/VOCdevkit/VOC2010/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | img_scale = (520, 520)
 8 | crop_size = (480, 480)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations', reduce_zero_label=True),
13 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=img_scale,
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=4,
39 |     workers_per_gpu=4,
40 |     train=dict(
41 |         type=dataset_type,
42 |         data_root=data_root,
43 |         img_dir='JPEGImages',
44 |         ann_dir='SegmentationClassContext',
45 |         split='ImageSets/SegmentationContext/train.txt',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         data_root=data_root,
50 |         img_dir='JPEGImages',
51 |         ann_dir='SegmentationClassContext',
52 |         split='ImageSets/SegmentationContext/val.txt',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='JPEGImages',
58 |         ann_dir='SegmentationClassContext',
59 |         split='ImageSets/SegmentationContext/val.txt',
60 |         pipeline=test_pipeline))
61 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/pascal_voc12.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PascalVOCDataset'
 3 | data_root = 'data/VOCdevkit/VOC2012'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='JPEGImages',
41 |         ann_dir='SegmentationClass',
42 |         split='ImageSets/Segmentation/train.txt',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         data_root=data_root,
47 |         img_dir='JPEGImages',
48 |         ann_dir='SegmentationClass',
49 |         split='ImageSets/Segmentation/val.txt',
50 |         pipeline=test_pipeline),
51 |     test=dict(
52 |         type=dataset_type,
53 |         data_root=data_root,
54 |         img_dir='JPEGImages',
55 |         ann_dir='SegmentationClass',
56 |         split='ImageSets/Segmentation/val.txt',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/pascal_voc12_aug.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pascal_voc12.py'
 2 | # dataset settings
 3 | data = dict(
 4 |     train=dict(
 5 |         ann_dir=['SegmentationClass', 'SegmentationClassAug'],
 6 |         split=[
 7 |             'ImageSets/Segmentation/train.txt',
 8 |             'ImageSets/Segmentation/aug.txt'
 9 |         ]))
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'PotsdamDataset'
 3 | data_root = 'data/potsdam'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(512, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='img_dir/train',
41 |         ann_dir='ann_dir/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='img_dir/val',
47 |         ann_dir='ann_dir/val',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/stare.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'STAREDataset'
 3 | data_root = 'data/STARE'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | img_scale = (605, 700)
 7 | crop_size = (128, 128)
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations'),
11 |     dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13 |     dict(type='RandomFlip', prob=0.5),
14 |     dict(type='PhotoMetricDistortion'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=img_scale,
25 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26 |         flip=False,
27 |         transforms=[
28 |             dict(type='Resize', keep_ratio=True),
29 |             dict(type='RandomFlip'),
30 |             dict(type='Normalize', **img_norm_cfg),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img'])
33 |         ])
34 | ]
35 | 
36 | data = dict(
37 |     samples_per_gpu=4,
38 |     workers_per_gpu=4,
39 |     train=dict(
40 |         type='RepeatDataset',
41 |         times=40000,
42 |         dataset=dict(
43 |             type=dataset_type,
44 |             data_root=data_root,
45 |             img_dir='images/training',
46 |             ann_dir='annotations/training',
47 |             pipeline=train_pipeline)),
48 |     val=dict(
49 |         type=dataset_type,
50 |         data_root=data_root,
51 |         img_dir='images/validation',
52 |         ann_dir='annotations/validation',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         data_root=data_root,
57 |         img_dir='images/validation',
58 |         ann_dir='annotations/validation',
59 |         pipeline=test_pipeline))
60 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='TensorboardLoggerHook')
 7 |     ])
 8 | # yapf:enable
 9 | dist_params = dict(backend='nccl')
10 | log_level = 'INFO'
11 | load_from = None
12 | resume_from = None
13 | workflow = [('train', 1)]
14 | cudnn_benchmark = True
15 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ann_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ANNHead',
19 |         in_channels=[1024, 2048],
20 |         in_index=[2, 3],
21 |         channels=512,
22 |         project_channels=256,
23 |         query_scales=(1, ),
24 |         key_pool_scales=(1, 3, 6, 8),
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/apcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='APCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pool_scales=(1, 2, 3, 6),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ccnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='CCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         recurrence=2,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/cgnet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='CGNet',
 7 |         norm_cfg=norm_cfg,
 8 |         in_channels=3,
 9 |         num_channels=(32, 64, 128),
10 |         num_blocks=(3, 21),
11 |         dilations=(2, 4),
12 |         reductions=(8, 16)),
13 |     decode_head=dict(
14 |         type='FCNHead',
15 |         in_channels=256,
16 |         in_index=2,
17 |         channels=256,
18 |         num_convs=0,
19 |         concat_input=False,
20 |         dropout_ratio=0,
21 |         num_classes=19,
22 |         norm_cfg=norm_cfg,
23 |         loss_decode=dict(
24 |             type='CrossEntropyLoss',
25 |             use_sigmoid=False,
26 |             loss_weight=1.0,
27 |             class_weight=[
28 |                 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
29 |                 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
30 |                 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
31 |                 10.396974, 10.055647
32 |             ])),
33 |     # model training and testing settings
34 |     train_cfg=dict(sampler=None),
35 |     test_cfg=dict(mode='whole'))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/danet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pam_channels=64,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ASPPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dilations=(1, 12, 24, 36),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='ASPPHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=16,
28 |         dilations=(1, 12, 24, 36),
29 |         dropout_ratio=0.1,
30 |         num_classes=2,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=dict(
36 |         type='FCNHead',
37 |         in_channels=128,
38 |         in_index=3,
39 |         channels=64,
40 |         num_convs=1,
41 |         concat_input=False,
42 |         dropout_ratio=0.1,
43 |         num_classes=2,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
51 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/deeplabv3plus_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DepthwiseSeparableASPPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dilations=(1, 12, 24, 36),
23 |         c1_in_channels=256,
24 |         c1_channels=48,
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dmnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DMHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         filter_sizes=(1, 3, 5, 7),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=dict(type='SyncBN', requires_grad=True),
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dnl_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='DNLHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dropout_ratio=0.1,
23 |         reduction=2,
24 |         use_scale=True,
25 |         mode='embedded_gaussian',
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/dpt_vit-b16.py:
--------------------------------------------------------------------------------
 1 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 2 | model = dict(
 3 |     type='EncoderDecoder',
 4 |     pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
 5 |     backbone=dict(
 6 |         type='VisionTransformer',
 7 |         img_size=224,
 8 |         embed_dims=768,
 9 |         num_layers=12,
10 |         num_heads=12,
11 |         out_indices=(2, 5, 8, 11),
12 |         final_norm=False,
13 |         with_cls_token=True,
14 |         output_cls_token=True),
15 |     decode_head=dict(
16 |         type='DPTHead',
17 |         in_channels=(768, 768, 768, 768),
18 |         channels=256,
19 |         embed_dims=768,
20 |         post_process_channels=[96, 192, 384, 768],
21 |         num_classes=150,
22 |         readout_type='project',
23 |         input_transform='multiple_select',
24 |         in_index=(0, 1, 2, 3),
25 |         norm_cfg=norm_cfg,
26 |         loss_decode=dict(
27 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
28 |     auxiliary_head=None,
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))  # yapf: disable
32 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/emanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='EMAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=256,
22 |         ema_channels=512,
23 |         num_bases=64,
24 |         num_stages=3,
25 |         momentum=0.1,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     auxiliary_head=dict(
33 |         type='FCNHead',
34 |         in_channels=1024,
35 |         in_index=2,
36 |         channels=256,
37 |         num_convs=1,
38 |         concat_input=False,
39 |         dropout_ratio=0.1,
40 |         num_classes=19,
41 |         norm_cfg=norm_cfg,
42 |         align_corners=False,
43 |         loss_decode=dict(
44 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/encnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='EncHead',
19 |         in_channels=[512, 1024, 2048],
20 |         in_index=(1, 2, 3),
21 |         channels=512,
22 |         num_codes=32,
23 |         use_se_loss=True,
24 |         add_lateral=False,
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
31 |         loss_se_decode=dict(
32 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
33 |     auxiliary_head=dict(
34 |         type='FCNHead',
35 |         in_channels=1024,
36 |         in_index=2,
37 |         channels=256,
38 |         num_convs=1,
39 |         concat_input=False,
40 |         dropout_ratio=0.1,
41 |         num_classes=19,
42 |         norm_cfg=norm_cfg,
43 |         align_corners=False,
44 |         loss_decode=dict(
45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
46 |     # model training and testing settings
47 |     train_cfg=dict(),
48 |     test_cfg=dict(mode='whole'))
49 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/erfnet_fcn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='ERFNet',
 8 |         in_channels=3,
 9 |         enc_downsample_channels=(16, 64, 128),
10 |         enc_stage_non_bottlenecks=(5, 8),
11 |         enc_non_bottleneck_dilations=(2, 4, 8, 16),
12 |         enc_non_bottleneck_channels=(64, 128),
13 |         dec_upsample_channels=(64, 16),
14 |         dec_stages_non_bottleneck=(2, 2),
15 |         dec_non_bottleneck_channels=(64, 16),
16 |         dropout_ratio=0.1,
17 |         init_cfg=None),
18 |     decode_head=dict(
19 |         type='FCNHead',
20 |         in_channels=16,
21 |         channels=128,
22 |         num_convs=1,
23 |         concat_input=False,
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fast_scnn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='FastSCNN',
 7 |         downsample_dw_channels=(32, 48),
 8 |         global_in_channels=64,
 9 |         global_block_channels=(64, 96, 128),
10 |         global_block_strides=(2, 2, 1),
11 |         global_out_channels=128,
12 |         higher_in_channels=64,
13 |         lower_in_channels=128,
14 |         fusion_out_channels=128,
15 |         out_indices=(0, 1, 2),
16 |         norm_cfg=norm_cfg,
17 |         align_corners=False),
18 |     decode_head=dict(
19 |         type='DepthwiseSeparableFCNHead',
20 |         in_channels=128,
21 |         channels=128,
22 |         concat_input=False,
23 |         num_classes=19,
24 |         in_index=-1,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
29 |     auxiliary_head=[
30 |         dict(
31 |             type='FCNHead',
32 |             in_channels=128,
33 |             channels=32,
34 |             num_convs=1,
35 |             num_classes=19,
36 |             in_index=-2,
37 |             norm_cfg=norm_cfg,
38 |             concat_input=False,
39 |             align_corners=False,
40 |             loss_decode=dict(
41 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
42 |         dict(
43 |             type='FCNHead',
44 |             in_channels=64,
45 |             channels=32,
46 |             num_convs=1,
47 |             num_classes=19,
48 |             in_index=-3,
49 |             norm_cfg=norm_cfg,
50 |             concat_input=False,
51 |             align_corners=False,
52 |             loss_decode=dict(
53 |                 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
54 |     ],
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         dilations=(1, 1, 2, 4),
11 |         strides=(1, 2, 2, 2),
12 |         out_indices=(1, 2, 3),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     neck=dict(
18 |         type='JPU',
19 |         in_channels=(512, 1024, 2048),
20 |         mid_channels=512,
21 |         start_level=0,
22 |         end_level=-1,
23 |         dilations=(1, 2, 4, 8),
24 |         align_corners=False,
25 |         norm_cfg=norm_cfg),
26 |     decode_head=dict(
27 |         type='PSPHead',
28 |         in_channels=2048,
29 |         in_index=2,
30 |         channels=512,
31 |         pool_scales=(1, 2, 3, 6),
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     auxiliary_head=dict(
39 |         type='FCNHead',
40 |         in_channels=1024,
41 |         in_index=1,
42 |         channels=256,
43 |         num_convs=1,
44 |         concat_input=False,
45 |         dropout_ratio=0.1,
46 |         num_classes=19,
47 |         norm_cfg=norm_cfg,
48 |         align_corners=False,
49 |         loss_decode=dict(
50 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='whole'))
54 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fcn_hr18.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://msra/hrnetv2_w18',
 6 |     backbone=dict(
 7 |         type='HRNet',
 8 |         norm_cfg=norm_cfg,
 9 |         norm_eval=False,
10 |         extra=dict(
11 |             stage1=dict(
12 |                 num_modules=1,
13 |                 num_branches=1,
14 |                 block='BOTTLENECK',
15 |                 num_blocks=(4, ),
16 |                 num_channels=(64, )),
17 |             stage2=dict(
18 |                 num_modules=1,
19 |                 num_branches=2,
20 |                 block='BASIC',
21 |                 num_blocks=(4, 4),
22 |                 num_channels=(18, 36)),
23 |             stage3=dict(
24 |                 num_modules=4,
25 |                 num_branches=3,
26 |                 block='BASIC',
27 |                 num_blocks=(4, 4, 4),
28 |                 num_channels=(18, 36, 72)),
29 |             stage4=dict(
30 |                 num_modules=3,
31 |                 num_branches=4,
32 |                 block='BASIC',
33 |                 num_blocks=(4, 4, 4, 4),
34 |                 num_channels=(18, 36, 72, 144)))),
35 |     decode_head=dict(
36 |         type='FCNHead',
37 |         in_channels=[18, 36, 72, 144],
38 |         in_index=(0, 1, 2, 3),
39 |         channels=sum([18, 36, 72, 144]),
40 |         input_transform='resize_concat',
41 |         kernel_size=1,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=-1,
45 |         num_classes=19,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fcn_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='FCNHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         num_convs=2,
23 |         concat_input=True,
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     auxiliary_head=dict(
31 |         type='FCNHead',
32 |         in_channels=1024,
33 |         in_index=2,
34 |         channels=256,
35 |         num_convs=1,
36 |         concat_input=False,
37 |         dropout_ratio=0.1,
38 |         num_classes=19,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43 |     # model training and testing settings
44 |     train_cfg=dict(),
45 |     test_cfg=dict(mode='whole'))
46 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fcn_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='FCNHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=64,
28 |         num_convs=1,
29 |         concat_input=False,
30 |         dropout_ratio=0.1,
31 |         num_classes=2,
32 |         norm_cfg=norm_cfg,
33 |         align_corners=False,
34 |         loss_decode=dict(
35 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |     auxiliary_head=dict(
37 |         type='FCNHead',
38 |         in_channels=128,
39 |         in_index=3,
40 |         channels=64,
41 |         num_convs=1,
42 |         concat_input=False,
43 |         dropout_ratio=0.1,
44 |         num_classes=2,
45 |         norm_cfg=norm_cfg,
46 |         align_corners=False,
47 |         loss_decode=dict(
48 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
49 |     # model training and testing settings
50 |     train_cfg=dict(),
51 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
52 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         num_outs=4),
22 |     decode_head=dict(
23 |         type='FPNHead',
24 |         in_channels=[256, 256, 256, 256],
25 |         in_index=[0, 1, 2, 3],
26 |         feature_strides=[4, 8, 16, 32],
27 |         channels=128,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     # model training and testing settings
35 |     train_cfg=dict(),
36 |     test_cfg=dict(mode='whole'))
37 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/gcnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='GCHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         ratio=1 / 4.,
23 |         pooling_type='att',
24 |         fusion_types=('channel_add', ),
25 |         dropout_ratio=0.1,
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/isanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='ISAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         isa_channels=256,
23 |         down_factor=(8, 8),
24 |         dropout_ratio=0.1,
25 |         num_classes=19,
26 |         norm_cfg=norm_cfg,
27 |         align_corners=False,
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 |     auxiliary_head=dict(
31 |         type='FCNHead',
32 |         in_channels=1024,
33 |         in_index=2,
34 |         channels=256,
35 |         num_convs=1,
36 |         concat_input=False,
37 |         dropout_ratio=0.1,
38 |         num_classes=19,
39 |         norm_cfg=norm_cfg,
40 |         align_corners=False,
41 |         loss_decode=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43 |     # model training and testing settings
44 |     train_cfg=dict(),
45 |     test_cfg=dict(mode='whole'))
46 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/lraspp_m-v3-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     backbone=dict(
 6 |         type='MobileNetV3',
 7 |         arch='large',
 8 |         out_indices=(1, 3, 16),
 9 |         norm_cfg=norm_cfg),
10 |     decode_head=dict(
11 |         type='LRASPPHead',
12 |         in_channels=(16, 24, 960),
13 |         in_index=(0, 1, 2),
14 |         channels=128,
15 |         input_transform='multiple_select',
16 |         dropout_ratio=0.1,
17 |         num_classes=19,
18 |         norm_cfg=norm_cfg,
19 |         act_cfg=dict(type='ReLU'),
20 |         align_corners=False,
21 |         loss_decode=dict(
22 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
23 |     # model training and testing settings
24 |     train_cfg=dict(),
25 |     test_cfg=dict(mode='whole'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/nonlocal_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='NLHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         dropout_ratio=0.1,
23 |         reduction=2,
24 |         use_scale=True,
25 |         mode='embedded_gaussian',
26 |         num_classes=19,
27 |         norm_cfg=norm_cfg,
28 |         align_corners=False,
29 |         loss_decode=dict(
30 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31 |     auxiliary_head=dict(
32 |         type='FCNHead',
33 |         in_channels=1024,
34 |         in_index=2,
35 |         channels=256,
36 |         num_convs=1,
37 |         concat_input=False,
38 |         dropout_ratio=0.1,
39 |         num_classes=19,
40 |         norm_cfg=norm_cfg,
41 |         align_corners=False,
42 |         loss_decode=dict(
43 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/ocrnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='CascadeEncoderDecoder',
 5 |     num_stages=2,
 6 |     pretrained='open-mmlab://resnet50_v1c',
 7 |     backbone=dict(
 8 |         type='ResNetV1c',
 9 |         depth=50,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         dilations=(1, 1, 2, 4),
13 |         strides=(1, 2, 1, 1),
14 |         norm_cfg=norm_cfg,
15 |         norm_eval=False,
16 |         style='pytorch',
17 |         contract_dilation=True),
18 |     decode_head=[
19 |         dict(
20 |             type='FCNHead',
21 |             in_channels=1024,
22 |             in_index=2,
23 |             channels=256,
24 |             num_convs=1,
25 |             concat_input=False,
26 |             dropout_ratio=0.1,
27 |             num_classes=19,
28 |             norm_cfg=norm_cfg,
29 |             align_corners=False,
30 |             loss_decode=dict(
31 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
32 |         dict(
33 |             type='OCRHead',
34 |             in_channels=2048,
35 |             in_index=3,
36 |             channels=512,
37 |             ocr_channels=256,
38 |             dropout_ratio=0.1,
39 |             num_classes=19,
40 |             norm_cfg=norm_cfg,
41 |             align_corners=False,
42 |             loss_decode=dict(
43 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
44 |     ],
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/pointrend_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='CascadeEncoderDecoder',
 5 |     num_stages=2,
 6 |     pretrained='open-mmlab://resnet50_v1c',
 7 |     backbone=dict(
 8 |         type='ResNetV1c',
 9 |         depth=50,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         dilations=(1, 1, 1, 1),
13 |         strides=(1, 2, 2, 2),
14 |         norm_cfg=norm_cfg,
15 |         norm_eval=False,
16 |         style='pytorch',
17 |         contract_dilation=True),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[256, 512, 1024, 2048],
21 |         out_channels=256,
22 |         num_outs=4),
23 |     decode_head=[
24 |         dict(
25 |             type='FPNHead',
26 |             in_channels=[256, 256, 256, 256],
27 |             in_index=[0, 1, 2, 3],
28 |             feature_strides=[4, 8, 16, 32],
29 |             channels=128,
30 |             dropout_ratio=-1,
31 |             num_classes=19,
32 |             norm_cfg=norm_cfg,
33 |             align_corners=False,
34 |             loss_decode=dict(
35 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36 |         dict(
37 |             type='PointHead',
38 |             in_channels=[256],
39 |             in_index=[0],
40 |             channels=256,
41 |             num_fcs=3,
42 |             coarse_pred_each_layer=True,
43 |             dropout_ratio=-1,
44 |             num_classes=19,
45 |             align_corners=False,
46 |             loss_decode=dict(
47 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
48 |     ],
49 |     # model training and testing settings
50 |     train_cfg=dict(
51 |         num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
52 |     test_cfg=dict(
53 |         mode='whole',
54 |         subdivision_steps=2,
55 |         subdivision_num_points=8196,
56 |         scale_factor=2))
57 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/psanet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='PSAHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         mask_size=(97, 97),
23 |         psa_type='bi-direction',
24 |         compact=False,
25 |         shrink_factor=2,
26 |         normalization_factor=1.0,
27 |         psa_softmax=True,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     auxiliary_head=dict(
35 |         type='FCNHead',
36 |         in_channels=1024,
37 |         in_index=2,
38 |         channels=256,
39 |         num_convs=1,
40 |         concat_input=False,
41 |         dropout_ratio=0.1,
42 |         num_classes=19,
43 |         norm_cfg=norm_cfg,
44 |         align_corners=False,
45 |         loss_decode=dict(
46 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/pspnet_r50-d8.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 2, 4),
12 |         strides=(1, 2, 1, 1),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='PSPHead',
19 |         in_channels=2048,
20 |         in_index=3,
21 |         channels=512,
22 |         pool_scales=(1, 2, 3, 6),
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/pspnet_unet_s5-d16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='UNet',
 8 |         in_channels=3,
 9 |         base_channels=64,
10 |         num_stages=5,
11 |         strides=(1, 1, 1, 1, 1),
12 |         enc_num_convs=(2, 2, 2, 2, 2),
13 |         dec_num_convs=(2, 2, 2, 2),
14 |         downsamples=(True, True, True, True),
15 |         enc_dilations=(1, 1, 1, 1, 1),
16 |         dec_dilations=(1, 1, 1, 1),
17 |         with_cp=False,
18 |         conv_cfg=None,
19 |         norm_cfg=norm_cfg,
20 |         act_cfg=dict(type='ReLU'),
21 |         upsample_cfg=dict(type='InterpConv'),
22 |         norm_eval=False),
23 |     decode_head=dict(
24 |         type='PSPHead',
25 |         in_channels=64,
26 |         in_index=4,
27 |         channels=16,
28 |         pool_scales=(1, 2, 3, 6),
29 |         dropout_ratio=0.1,
30 |         num_classes=2,
31 |         norm_cfg=norm_cfg,
32 |         align_corners=False,
33 |         loss_decode=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 |     auxiliary_head=dict(
36 |         type='FCNHead',
37 |         in_channels=128,
38 |         in_index=3,
39 |         channels=64,
40 |         num_convs=1,
41 |         concat_input=False,
42 |         dropout_ratio=0.1,
43 |         num_classes=2,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='slide', crop_size=256, stride=170))
51 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/segformer_mit-b0.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='MixVisionTransformer',
 8 |         in_channels=3,
 9 |         embed_dims=32,
10 |         num_stages=4,
11 |         num_layers=[2, 2, 2, 2],
12 |         num_heads=[1, 2, 5, 8],
13 |         patch_sizes=[7, 3, 3, 3],
14 |         sr_ratios=[8, 4, 2, 1],
15 |         out_indices=(0, 1, 2, 3),
16 |         mlp_ratio=4,
17 |         qkv_bias=True,
18 |         drop_rate=0.0,
19 |         attn_drop_rate=0.0,
20 |         drop_path_rate=0.1),
21 |     decode_head=dict(
22 |         type='SegformerHead',
23 |         in_channels=[32, 64, 160, 256],
24 |         in_index=[0, 1, 2, 3],
25 |         channels=256,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     # model training and testing settings
33 |     train_cfg=dict(),
34 |     test_cfg=dict(mode='whole'))
35 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/twins_pcpvt-s_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | backbone_norm_cfg = dict(type='LN')
 3 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     backbone=dict(
 7 |         type='PCPVT',
 8 |         init_cfg=dict(
 9 |             type='Pretrained', checkpoint='pretrained/pcpvt_small.pth'),
10 |         in_channels=3,
11 |         embed_dims=[64, 128, 320, 512],
12 |         num_heads=[1, 2, 5, 8],
13 |         patch_sizes=[4, 2, 2, 2],
14 |         strides=[4, 2, 2, 2],
15 |         mlp_ratios=[8, 8, 4, 4],
16 |         out_indices=(0, 1, 2, 3),
17 |         qkv_bias=True,
18 |         norm_cfg=backbone_norm_cfg,
19 |         depths=[3, 4, 6, 3],
20 |         sr_ratios=[8, 4, 2, 1],
21 |         norm_after_stage=False,
22 |         drop_rate=0.0,
23 |         attn_drop_rate=0.,
24 |         drop_path_rate=0.2),
25 |     neck=dict(
26 |         type='FPN',
27 |         in_channels=[64, 128, 320, 512],
28 |         out_channels=256,
29 |         num_outs=4),
30 |     decode_head=dict(
31 |         type='FPNHead',
32 |         in_channels=[256, 256, 256, 256],
33 |         in_index=[0, 1, 2, 3],
34 |         feature_strides=[4, 8, 16, 32],
35 |         channels=128,
36 |         dropout_ratio=0.1,
37 |         num_classes=150,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/twins_pcpvt-s_upernet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | backbone_norm_cfg = dict(type='LN')
 3 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     backbone=dict(
 7 |         type='PCPVT',
 8 |         init_cfg=dict(
 9 |             type='Pretrained', checkpoint='pretrained/pcpvt_small.pth'),
10 |         in_channels=3,
11 |         embed_dims=[64, 128, 320, 512],
12 |         num_heads=[1, 2, 5, 8],
13 |         patch_sizes=[4, 2, 2, 2],
14 |         strides=[4, 2, 2, 2],
15 |         mlp_ratios=[8, 8, 4, 4],
16 |         out_indices=(0, 1, 2, 3),
17 |         qkv_bias=True,
18 |         norm_cfg=backbone_norm_cfg,
19 |         depths=[3, 4, 6, 3],
20 |         sr_ratios=[8, 4, 2, 1],
21 |         norm_after_stage=False,
22 |         drop_rate=0.0,
23 |         attn_drop_rate=0.,
24 |         drop_path_rate=0.2),
25 |     decode_head=dict(
26 |         type='UPerHead',
27 |         in_channels=[64, 128, 320, 512],
28 |         in_index=[0, 1, 2, 3],
29 |         pool_scales=(1, 2, 3, 6),
30 |         channels=512,
31 |         dropout_ratio=0.1,
32 |         num_classes=150,
33 |         norm_cfg=norm_cfg,
34 |         align_corners=False,
35 |         loss_decode=dict(
36 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37 |     auxiliary_head=dict(
38 |         type='FCNHead',
39 |         in_channels=320,
40 |         in_index=2,
41 |         channels=256,
42 |         num_convs=1,
43 |         concat_input=False,
44 |         dropout_ratio=0.1,
45 |         num_classes=150,
46 |         norm_cfg=norm_cfg,
47 |         align_corners=False,
48 |         loss_decode=dict(
49 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50 |     # model training and testing settings
51 |     train_cfg=dict(),
52 |     test_cfg=dict(mode='whole'))
53 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_beit.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit
 4 | # Copyright (c) 2021 Microsoft
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # By Hangbo Bao
 7 | # Based on timm, mmseg, setr, xcit and swin code bases
 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm
 9 | # https://github.com/fudan-zvg/SETR
10 | # https://github.com/facebookresearch/xcit/
11 | # https://github.com/microsoft/Swin-Transformer
12 | # --------------------------------------------------------'
13 | norm_cfg = dict(type='SyncBN', requires_grad=True)
14 | model = dict(
15 |     type='EncoderDecoder',
16 |     pretrained=None,
17 |     backbone=dict(
18 |         type='XCiT',
19 |         patch_size=16,
20 |         embed_dim=384,
21 |         depth=12,
22 |         num_heads=8,
23 |         mlp_ratio=4,
24 |         qkv_bias=True,
25 |         use_abs_pos_emb=True,
26 |         use_rel_pos_bias=False,
27 |     ),
28 |     decode_head=dict(
29 |         type='UPerHead',
30 |         in_channels=[384, 384, 384, 384],
31 |         in_index=[0, 1, 2, 3],
32 |         pool_scales=(1, 2, 3, 6),
33 |         channels=512,
34 |         dropout_ratio=0.1,
35 |         num_classes=19,
36 |         norm_cfg=norm_cfg,
37 |         align_corners=False,
38 |         loss_decode=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40 |     auxiliary_head=dict(
41 |         type='FCNHead',
42 |         in_channels=384,
43 |         in_index=2,
44 |         channels=256,
45 |         num_convs=1,
46 |         concat_input=False,
47 |         dropout_ratio=0.1,
48 |         num_classes=19,
49 |         norm_cfg=norm_cfg,
50 |         align_corners=False,
51 |         loss_decode=dict(
52 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='whole'))


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='UPerHead',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         in_index=[0, 1, 2, 3],
21 |         pool_scales=(1, 2, 3, 6),
22 |         channels=512,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_swin.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained=None,
 7 |     backbone=dict(
 8 |         type='SwinTransformer',
 9 |         pretrain_img_size=224,
10 |         embed_dims=96,
11 |         patch_size=4,
12 |         window_size=7,
13 |         mlp_ratio=4,
14 |         depths=[2, 2, 6, 2],
15 |         num_heads=[3, 6, 12, 24],
16 |         strides=(4, 2, 2, 2),
17 |         out_indices=(0, 1, 2, 3),
18 |         qkv_bias=True,
19 |         qk_scale=None,
20 |         patch_norm=True,
21 |         drop_rate=0.,
22 |         attn_drop_rate=0.,
23 |         drop_path_rate=0.3,
24 |         use_abs_pos_embed=False,
25 |         act_cfg=dict(type='GELU'),
26 |         norm_cfg=backbone_norm_cfg),
27 |     decode_head=dict(
28 |         type='UPerHead',
29 |         in_channels=[96, 192, 384, 768],
30 |         in_index=[0, 1, 2, 3],
31 |         pool_scales=(1, 2, 3, 6),
32 |         channels=512,
33 |         dropout_ratio=0.1,
34 |         num_classes=19,
35 |         norm_cfg=norm_cfg,
36 |         align_corners=False,
37 |         loss_decode=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39 |     auxiliary_head=dict(
40 |         type='FCNHead',
41 |         in_channels=384,
42 |         in_index=2,
43 |         channels=256,
44 |         num_convs=1,
45 |         concat_input=False,
46 |         dropout_ratio=0.1,
47 |         num_classes=19,
48 |         norm_cfg=norm_cfg,
49 |         align_corners=False,
50 |         loss_decode=dict(
51 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52 |     # model training and testing settings
53 |     train_cfg=dict(),
54 |     test_cfg=dict(mode='whole'))
55 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
 6 |     backbone=dict(
 7 |         type='VisionTransformer',
 8 |         img_size=(512, 512),
 9 |         patch_size=16,
10 |         in_channels=3,
11 |         embed_dims=768,
12 |         num_layers=12,
13 |         num_heads=12,
14 |         mlp_ratio=4,
15 |         out_indices=(2, 5, 8, 11),
16 |         qkv_bias=True,
17 |         drop_rate=0.0,
18 |         attn_drop_rate=0.0,
19 |         drop_path_rate=0.0,
20 |         with_cls_token=True,
21 |         norm_cfg=dict(type='LN', eps=1e-6),
22 |         act_cfg=dict(type='GELU'),
23 |         norm_eval=False,
24 |         interpolate_mode='bicubic'),
25 |     neck=dict(
26 |         type='MultiLevelNeck',
27 |         in_channels=[768, 768, 768, 768],
28 |         out_channels=768,
29 |         scales=[4, 2, 1, 0.5]),
30 |     decode_head=dict(
31 |         type='UPerHead',
32 |         in_channels=[768, 768, 768, 768],
33 |         in_index=[0, 1, 2, 3],
34 |         pool_scales=(1, 2, 3, 6),
35 |         channels=512,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42 |     auxiliary_head=dict(
43 |         type='FCNHead',
44 |         in_channels=768,
45 |         in_index=3,
46 |         channels=256,
47 |         num_convs=1,
48 |         concat_input=False,
49 |         dropout_ratio=0.1,
50 |         num_classes=19,
51 |         norm_cfg=norm_cfg,
52 |         align_corners=False,
53 |         loss_decode=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
55 |     # model training and testing settings
56 |     train_cfg=dict(),
57 |     test_cfg=dict(mode='whole'))  # yapf: disable
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=160000)
 8 | checkpoint_config = dict(by_epoch=False, interval=16000)
 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=20000)
 8 | checkpoint_config = dict(by_epoch=False, interval=2000)
 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=320000)
 8 | checkpoint_config = dict(by_epoch=False, interval=32000)
 9 | evaluation = dict(interval=32000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=40000)
 8 | checkpoint_config = dict(by_epoch=False, interval=4000)
 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=80000)
 8 | checkpoint_config = dict(by_epoch=False, interval=8000)
 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/chase_db1/README.md:
--------------------------------------------------------------------------------
 1 | # CHASE DB1
 2 | 
 3 | <!-- [ALGORITHM] -->
 4 | 
 5 | ## Introduction
 6 | 
 7 | The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip).
 8 | 
 9 | To convert CHASE DB1 dataset to MMSegmentation format, you should run the [script](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/chase_db1.py) provided by mmseg official:
10 | 
11 | ```shell
12 | python /path/to/convertor/chase_db1.py /path/to/CHASEDB1.zip
13 | ```
14 | 
15 | The script will make directory structure automatically.
16 | 
17 | ## Results and Models
18 | 
19 | | Method      | Backbone      | Pretrain | Batch Size | Lr schd | Crop Size | mDice |  #Param | Config                                                           | Download                                               |
20 | |:-----------:|:-------------:|:---------:|:----------:|:-------:|:---------:|:---------:|:------:|:----------------------------------------------------------------:|:------------------------------------------------------:|
21 | | Mask2Former | ViT-Adapter-L | BEiT-L    | 4x4        | 40k     | 128       | 89.4      |  350M   | [config](./mask2former_beit_adapter_large_128_40k_chase_db1_ss.py) | [log](https://github.com/czczup/ViT-Adapter/issues/11) |
22 | 


--------------------------------------------------------------------------------
/segmentation/configs/potsdam/README.md:
--------------------------------------------------------------------------------
 1 | # ISPRS Potsdam
 2 | 
 3 | <!-- [ALGORITHM] -->
 4 | 
 5 | ## Introduction
 6 | 
 7 | The Potsdam dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Potsdam.
 8 | 
 9 | The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). The `2_Ortho_RGB.zip` and `5_Labels_all_noBoundary.zip` are required.
10 | 
11 | For Potsdam dataset, please run the [script](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/potsdam.py) provided by mmseg official to download and re-organize the dataset.
12 | 
13 | ```python
14 | python /path/to/convertor/potsdam.py /path/to/potsdam
15 | ```
16 | 
17 | In the default setting, it will generate 3456 images for training and 2016 images for validation.
18 | 
19 | ## Results and Models
20 | 
21 | | Method      | Backbone      | Pretrain | Batch Size | Lr schd | Crop Size | mIoU (SS) | #Param | Config                                                           | Download                                               |
22 | |:-----------:|:-------------:|:--------:|:----------:|:-------:|:---------:|:---------:|:------:|:----------------------------------------------------------------:|:------------------------------------------------------:|
23 | | Mask2Former | ViT-Adapter-L | BEiT-L   | 8x1        | 80k     | 512       | 80.0      | 352M   | [config](./mask2former_beit_adapter_large_512_80k_potsdam_ss.py) | [log](https://github.com/czczup/ViT-Adapter/issues/38) |
24 | 


--------------------------------------------------------------------------------
/segmentation/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29510}
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
10 | 


--------------------------------------------------------------------------------
/segmentation/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29300}
 6 | 
 7 | #PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch --deterministic ${@:3}
10 | 


--------------------------------------------------------------------------------
/segmentation/image_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from argparse import ArgumentParser
 3 | 
 4 | import mmcv
 5 | 
 6 | import mmcv_custom   # noqa: F401,F403
 7 | import mmseg_custom   # noqa: F401,F403
 8 | from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
 9 | from mmseg.core.evaluation import get_palette
10 | from mmcv.runner import load_checkpoint
11 | from mmseg.core import get_classes
12 | import cv2
13 | import os.path as osp
14 | 
15 | 
16 | def main():
17 |     parser = ArgumentParser()
18 |     parser.add_argument('config', help='Config file')
19 |     parser.add_argument('checkpoint', help='Checkpoint file')
20 |     parser.add_argument('img', help='Image file')
21 |     parser.add_argument('--out', type=str, default="demo", help='out dir')
22 |     parser.add_argument(
23 |         '--device', default='cuda:0', help='Device used for inference')
24 |     parser.add_argument(
25 |         '--palette',
26 |         default='cityscapes',
27 |         help='Color palette used for segmentation map')
28 |     parser.add_argument(
29 |         '--opacity',
30 |         type=float,
31 |         default=0.5,
32 |         help='Opacity of painted segmentation map. In (0, 1] range.')
33 |     args = parser.parse_args()
34 | 
35 |     # build the model from a config file and a checkpoint file
36 |     
37 |     model = init_segmentor(args.config, checkpoint=None, device=args.device)
38 |     checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
39 |     if 'CLASSES' in checkpoint.get('meta', {}):
40 |         model.CLASSES = checkpoint['meta']['CLASSES']
41 |     else:
42 |         model.CLASSES = get_classes(args.palette)
43 |         
44 |     # test a single image
45 |     result = inference_segmentor(model, args.img)
46 |     # show the results
47 |     if hasattr(model, 'module'):
48 |         model = model.module
49 |     img = model.show_result(args.img, result,
50 |                             palette=get_palette(args.palette),
51 |                             show=False, opacity=args.opacity)
52 |     mmcv.mkdir_or_exist(args.out)
53 |     out_path = osp.join(args.out, osp.basename(args.img))
54 |     cv2.imwrite(out_path, img)
55 |     print(f"Result is save at {out_path}")
56 | 
57 | if __name__ == '__main__':
58 |     main()


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .checkpoint import load_checkpoint
 3 | from .customized_text import CustomizedTextLoggerHook
 4 | from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
 5 | from .my_checkpoint import my_load_checkpoint
 6 | 
 7 | __all__ = [
 8 |     'LayerDecayOptimizerConstructor',
 9 |     'CustomizedTextLoggerHook',
10 |     'load_checkpoint', 'my_checkpoint',
11 | ]
12 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import *  # noqa: F401,F403
2 | from .datasets import *  # noqa: F401,F403
3 | from .models import *  # noqa: F401,F403
4 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from mmseg.core.evaluation import *  # noqa: F401, F403
 3 | from mmseg.core.seg import *  # noqa: F401, F403
 4 | 
 5 | from .anchor import *  # noqa: F401,F403
 6 | from .box import *  # noqa: F401,F403
 7 | from .evaluation import *  # noqa: F401,F403
 8 | from .mask import *  # noqa: F401,F403
 9 | from .utils import *  # noqa: F401, F403
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .point_generator import MlvlPointGenerator  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/anchor/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | from mmcv.utils import Registry, build_from_cfg
 5 | 
 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points')
 7 | 
 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS
 9 | 
10 | 
11 | def build_prior_generator(cfg, default_args=None):
12 |     return build_from_cfg(cfg, PRIOR_GENERATORS, default_args)
13 | 
14 | 
15 | def build_anchor_generator(cfg, default_args=None):
16 |     warnings.warn(
17 |         '``build_anchor_generator`` would be deprecated soon, please use '
18 |         '``build_prior_generator`` ')
19 |     return build_prior_generator(cfg, default_args=default_args)
20 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .builder import *  # noqa: F401,F403
3 | from .samplers import MaskPseudoSampler  # noqa: F401,F403
4 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg
 3 | 
 4 | BBOX_SAMPLERS = Registry('bbox_sampler')
 5 | BBOX_CODERS = Registry('bbox_coder')
 6 | 
 7 | 
 8 | def build_sampler(cfg, **default_args):
 9 |     """Builder of box sampler."""
10 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
11 | 
12 | 
13 | def build_bbox_coder(cfg, **default_args):
14 |     """Builder of box coder."""
15 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
16 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .mask_pseudo_sampler import MaskPseudoSampler  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/samplers/mask_pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | """copy from
 3 | https://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py."""
 4 | 
 5 | import torch
 6 | 
 7 | from ..builder import BBOX_SAMPLERS
 8 | from .base_sampler import BaseSampler
 9 | from .mask_sampling_result import MaskSamplingResult
10 | 
11 | 
12 | @BBOX_SAMPLERS.register_module()
13 | class MaskPseudoSampler(BaseSampler):
14 |     """A pseudo sampler that does not do sampling actually."""
15 |     def __init__(self, **kwargs):
16 |         pass
17 | 
18 |     def _sample_pos(self, **kwargs):
19 |         """Sample positive samples."""
20 |         raise NotImplementedError
21 | 
22 |     def _sample_neg(self, **kwargs):
23 |         """Sample negative samples."""
24 |         raise NotImplementedError
25 | 
26 |     def sample(self, assign_result, masks, gt_masks, **kwargs):
27 |         """Directly returns the positive and negative indices  of samples.
28 | 
29 |         Args:
30 |             assign_result (:obj:`AssignResult`): Assigned results
31 |             masks (torch.Tensor): Bounding boxes
32 |             gt_masks (torch.Tensor): Ground truth boxes
33 |         Returns:
34 |             :obj:`SamplingResult`: sampler results
35 |         """
36 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0,
37 |                                  as_tuple=False).squeeze(-1).unique()
38 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0,
39 |                                  as_tuple=False).squeeze(-1).unique()
40 |         gt_flags = masks.new_zeros(masks.shape[0], dtype=torch.uint8)
41 |         sampling_result = MaskSamplingResult(pos_inds, neg_inds, masks,
42 |                                              gt_masks, assign_result, gt_flags)
43 |         return sampling_result
44 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/samplers/mask_sampling_result.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | """copy from
 3 | https://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py."""
 4 | 
 5 | import torch
 6 | 
 7 | from .sampling_result import SamplingResult
 8 | 
 9 | 
10 | class MaskSamplingResult(SamplingResult):
11 |     """Mask sampling result."""
12 |     def __init__(self, pos_inds, neg_inds, masks, gt_masks, assign_result,
13 |                  gt_flags):
14 |         self.pos_inds = pos_inds
15 |         self.neg_inds = neg_inds
16 |         self.pos_masks = masks[pos_inds]
17 |         self.neg_masks = masks[neg_inds]
18 |         self.pos_is_gt = gt_flags[pos_inds]
19 | 
20 |         self.num_gts = gt_masks.shape[0]
21 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
22 | 
23 |         if gt_masks.numel() == 0:
24 |             # hack for index error case
25 |             assert self.pos_assigned_gt_inds.numel() == 0
26 |             self.pos_gt_masks = torch.empty_like(gt_masks)
27 |         else:
28 |             self.pos_gt_masks = gt_masks[self.pos_assigned_gt_inds, :]
29 | 
30 |         if assign_result.labels is not None:
31 |             self.pos_gt_labels = assign_result.labels[pos_inds]
32 |         else:
33 |             self.pos_gt_labels = None
34 | 
35 |     @property
36 |     def masks(self):
37 |         """torch.Tensor: concatenated positive and negative boxes"""
38 |         return torch.cat([self.pos_masks, self.neg_masks])
39 | 
40 |     def __nice__(self):
41 |         data = self.info.copy()
42 |         data['pos_masks'] = data.pop('pos_masks').shape
43 |         data['neg_masks'] = data.pop('neg_masks').shape
44 |         parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
45 |         body = '    ' + ',\n    '.join(parts)
46 |         return '{\n' + body + '\n}'
47 | 
48 |     @property
49 |     def info(self):
50 |         """Returns a dictionary of info about the object."""
51 |         return {
52 |             'pos_inds': self.pos_inds,
53 |             'neg_inds': self.neg_inds,
54 |             'pos_masks': self.pos_masks,
55 |             'neg_masks': self.neg_masks,
56 |             'pos_is_gt': self.pos_is_gt,
57 |             'num_gts': self.num_gts,
58 |             'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
59 |         }
60 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .panoptic_utils import INSTANCE_OFFSET  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/evaluation/panoptic_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # A custom value to distinguish instance ID and category ID; need to
3 | # be greater than the number of categories.
4 | # For a pixel in the panoptic result map:
5 | #   pan_id = ins_id * INSTANCE_OFFSET + cat_id
6 | INSTANCE_OFFSET = 1000
7 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .utils import mask2bbox  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads,
 3 |                          reduce_mean)
 4 | from .misc import add_prefix, multi_apply
 5 | 
 6 | __all__ = [
 7 |     'add_prefix', 'multi_apply', 'DistOptimizerHook', 'allreduce_grads',
 8 |     'all_reduce_dict', 'reduce_mean'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def multi_apply(func, *args, **kwargs):
 3 |     """Apply function to a list of arguments.
 4 | 
 5 |     Note:
 6 |         This function applies the ``func`` to multiple inputs and
 7 |         map the multiple outputs of the ``func`` into different
 8 |         list. Each list contains the same type of outputs corresponding
 9 |         to different inputs.
10 | 
11 |     Args:
12 |         func (Function): A function that will be applied to a list of
13 |             arguments
14 | 
15 |     Returns:
16 |         tuple(list): A tuple containing multiple list, each list contains \
17 |             a kind of returned results by the function
18 |     """
19 |     pfunc = partial(func, **kwargs) if kwargs else func
20 |     map_results = map(pfunc, *args)
21 |     return tuple(map(list, zip(*map_results)))
22 | 
23 | 
24 | def add_prefix(inputs, prefix):
25 |     """Add prefix for dict.
26 | 
27 |     Args:
28 |         inputs (dict): The input dict with str keys.
29 |         prefix (str): The prefix to add.
30 | 
31 |     Returns:
32 | 
33 |         dict: The dict with keys updated with ``prefix``.
34 |     """
35 | 
36 |     outputs = dict()
37 |     for name, value in inputs.items():
38 |         outputs[f'{prefix}.{name}'] = value
39 | 
40 |     return outputs
41 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mapillary import MapillaryDataset  # noqa: F401,F403
3 | from .potsdam import PotsdamDataset  # noqa: F401,F403
4 | from .pipelines import *  # noqa: F401,F403
5 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .formatting import DefaultFormatBundle, ToMask
3 | from .transform import MapillaryHack, PadShortSide, SETR_Resize
4 | 
5 | __all__ = [
6 |     'DefaultFormatBundle', 'ToMask', 'SETR_Resize', 'PadShortSide',
7 |     'MapillaryHack'
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmseg.datasets.builder import DATASETS
 3 | from mmseg.datasets.custom import CustomDataset
 4 | 
 5 | @DATASETS.register_module(force=True)
 6 | class PotsdamDataset(CustomDataset):
 7 |     """ISPRS Potsdam dataset.
 8 | 
 9 |     In segmentation map annotation for Potsdam dataset, 0 is the ignore index.
10 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
11 |     ``seg_map_suffix`` are both fixed to '.png'.
12 |     """
13 |     CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
14 |                'car', 'clutter')
15 | 
16 |     PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
17 |                [255, 255, 0], [255, 0, 0]]
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(PotsdamDataset, self).__init__(
21 |             img_suffix='.png',
22 |             seg_map_suffix='.png',
23 |             reduce_zero_label=True,
24 |             **kwargs)


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .builder import (MASK_ASSIGNERS, MATCH_COST, TRANSFORMER, build_assigner,
 4 |                       build_match_cost)
 5 | from .decode_heads import *  # noqa: F401,F403
 6 | from .losses import *  # noqa: F401,F403
 7 | from .plugins import *  # noqa: F401,F403
 8 | from .segmentors import *  # noqa: F401,F403
 9 | 
10 | __all__ = [
11 |     'MASK_ASSIGNERS', 'MATCH_COST', 'TRANSFORMER', 'build_assigner',
12 |     'build_match_cost'
13 | ]
14 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .beit_adapter import BEiTAdapter
 3 | from .beit_baseline import BEiTBaseline
 4 | from .vit_adapter import ViTAdapter
 5 | from .vit_baseline import ViTBaseline
 6 | from .uniperceiver_adapter import UniPerceiverAdapter
 7 | 
 8 | __all__ = ['ViTBaseline', 'ViTAdapter', 'BEiTAdapter',
 9 |            'BEiTBaseline', 'UniPerceiverAdapter']
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings   # noqa: F401,F403
 3 | 
 4 | from mmcv.utils import Registry
 5 | 
 6 | TRANSFORMER = Registry('Transformer')
 7 | MASK_ASSIGNERS = Registry('mask_assigner')
 8 | MATCH_COST = Registry('match_cost')
 9 | 
10 | 
11 | def build_match_cost(cfg):
12 |     """Build Match Cost."""
13 |     return MATCH_COST.build(cfg)
14 | 
15 | 
16 | def build_assigner(cfg):
17 |     """Build Assigner."""
18 |     return MASK_ASSIGNERS.build(cfg)
19 | 
20 | 
21 | def build_transformer(cfg):
22 |     """Build Transformer."""
23 |     return TRANSFORMER.build(cfg)
24 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/decode_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mask2former_head import Mask2FormerHead
3 | from .maskformer_head import MaskFormerHead
4 | 
5 | __all__ = [
6 |     'MaskFormerHead',
7 |     'Mask2FormerHead',
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 3 |                                  cross_entropy, mask_cross_entropy)
 4 | from .dice_loss import DiceLoss
 5 | from .focal_loss import FocalLoss
 6 | from .match_costs import (ClassificationCost, CrossEntropyLossCost, DiceCost,
 7 |                           MaskFocalLossCost)
 8 | 
 9 | __all__ = [
10 |     'cross_entropy', 'binary_cross_entropy', 'mask_cross_entropy',
11 |     'CrossEntropyLoss', 'DiceLoss', 'FocalLoss', 'ClassificationCost',
12 |     'MaskFocalLossCost', 'DiceCost', 'CrossEntropyLossCost'
13 | ]
14 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder
3 | from .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder
4 | 
5 | __all__ = [
6 |     'PixelDecoder', 'TransformerEncoderPixelDecoder',
7 |     'MSDeformAttnPixelDecoder'
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/segmentors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .encoder_decoder_mask2former import EncoderDecoderMask2Former
3 | from .encoder_decoder_mask2former_aug import EncoderDecoderMask2FormerAug
4 | 
5 | __all__ = ['EncoderDecoderMask2Former', 'EncoderDecoderMask2FormerAug']
6 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .assigner import MaskHungarianAssigner
 3 | from .point_sample import get_uncertain_point_coords_with_randomness
 4 | from .positional_encoding import (LearnedPositionalEncoding,
 5 |                                   SinePositionalEncoding)
 6 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer,
 7 |                           DynamicConv, Transformer)
 8 | 
 9 | __all__ = [
10 |     'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'DynamicConv',
11 |     'Transformer', 'LearnedPositionalEncoding', 'SinePositionalEncoding',
12 |     'MaskHungarianAssigner', 'get_uncertain_point_coords_with_randomness'
13 | ]
14 | 


--------------------------------------------------------------------------------
/segmentation/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/segmentation/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CityscapesDataset'
 4 | data_root = 'data/cityscapes/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(interval=1, metric='bbox')
54 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CityscapesDataset'
 4 | data_root = 'data/cityscapes/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CocoDataset'
 4 | data_root = 'data/coco/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(interval=1, metric='bbox')
47 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'CocoDataset'
 4 | data_root = 'data/coco/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(metric=['bbox', 'segm'])
47 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/grounding_gqa.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'VGDataset'
 4 | data_root = 'data/grounding_gqa/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='LoadRefer'),
11 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
12 |     dict(type='RandomFlipWithRefer', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='TokenizeRefer', max_sent_len=64),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'refer',
18 |          'r_mask', 'gt_bboxes', 'gt_labels']),
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(type='LoadRefer'),
23 |     dict(type='MultiScaleFlipAug',
24 |          img_scale=(1333, 800),
25 |          flip=False,
26 |          transforms=[
27 |              dict(type='Resize', keep_ratio=True),
28 |              dict(type='RandomFlipWithRefer'),
29 |              dict(type='Normalize', **img_norm_cfg),
30 |              dict(type='Pad', size_divisor=32),
31 |              dict(type='ImageToTensor', keys=['img']),
32 |              dict(type='TokenizeRefer', max_sent_len=64),
33 |              dict(type='Collect', keys=['img', 'refer', 'r_mask']),
34 |          ])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=2,
38 |     workers_per_gpu=2,
39 |     train=dict(type=dataset_type,
40 |                ann_file=data_root + 'annotations/train.json',
41 |                img_prefix=data_root + 'images',
42 |                pipeline=train_pipeline),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root + 'annotations/val.json',
45 |              img_prefix=data_root + 'images',
46 |              pipeline=test_pipeline),
47 |     test=dict(type=dataset_type,
48 |               ann_file=data_root + 'annotations/val.json',
49 |               img_prefix=data_root + 'images',
50 |               pipeline=test_pipeline))
51 | evaluation = dict(interval=1, metric=['IoU', 'Acc'])


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | _base_ = 'coco_instance.py'
 4 | dataset_type = 'LVISV05Dataset'
 5 | data_root = 'data/lvis_v0.5/'
 6 | data = dict(samples_per_gpu=2,
 7 |             workers_per_gpu=2,
 8 |             train=dict(_delete_=True,
 9 |                        type='ClassBalancedDataset',
10 |                        oversample_thr=1e-3,
11 |                        dataset=dict(type=dataset_type,
12 |                                     ann_file=data_root +
13 |                                     'annotations/lvis_v0.5_train.json',
14 |                                     img_prefix=data_root + 'train2017/')),
15 |             val=dict(type=dataset_type,
16 |                      ann_file=data_root + 'annotations/lvis_v0.5_val.json',
17 |                      img_prefix=data_root + 'val2017/'),
18 |             test=dict(type=dataset_type,
19 |                       ann_file=data_root + 'annotations/lvis_v0.5_val.json',
20 |                       img_prefix=data_root + 'val2017/'))
21 | evaluation = dict(metric=['bbox', 'segm'])
22 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | _base_ = 'coco_instance.py'
 4 | dataset_type = 'LVISV1Dataset'
 5 | data_root = 'data/lvis_v1/'
 6 | data = dict(samples_per_gpu=2,
 7 |             workers_per_gpu=2,
 8 |             train=dict(_delete_=True,
 9 |                        type='ClassBalancedDataset',
10 |                        oversample_thr=1e-3,
11 |                        dataset=dict(type=dataset_type,
12 |                                     ann_file=data_root +
13 |                                     'annotations/lvis_v1_train.json',
14 |                                     img_prefix=data_root)),
15 |             val=dict(type=dataset_type,
16 |                      ann_file=data_root + 'annotations/lvis_v1_val.json',
17 |                      img_prefix=data_root),
18 |             test=dict(type=dataset_type,
19 |                       ann_file=data_root + 'annotations/lvis_v1_val.json',
20 |                       img_prefix=data_root))
21 | evaluation = dict(metric=['bbox', 'segm'])
22 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/refcoco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'VGDataset'
 4 | data_root = 'data/refcoco/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='LoadRefer'),
11 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
12 |     dict(type='RandomFlipWithRefer', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='TokenizeRefer', max_sent_len=128),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'refer',
18 |          'r_mask', 'gt_bboxes', 'gt_labels']),
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(type='LoadRefer'),
23 |     dict(type='TokenizeRefer', max_sent_len=128),
24 |     dict(type='MultiScaleFlipAug',
25 |          img_scale=(1333, 800),
26 |          flip=False,
27 |          transforms=[
28 |              dict(type='Resize', keep_ratio=True),
29 |              dict(type='RandomFlipWithRefer'),
30 |              dict(type='Normalize', **img_norm_cfg),
31 |              dict(type='Pad', size_divisor=32),
32 |              dict(type='ImageToTensor', keys=['img']),
33 |              dict(type='TokenizeRefer', max_sent_len=128),
34 |              dict(type='Collect', keys=['img', 'refer', 'r_mask']),
35 |          ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=2,
39 |     workers_per_gpu=2,
40 |     train=dict(type=dataset_type,
41 |                ann_file=data_root + 'refcoco/refcoco_train.json',
42 |                img_prefix=data_root + 'images',
43 |                pipeline=train_pipeline),
44 |     val=dict(type=dataset_type,
45 |              ann_file=data_root + 'refcoco/refcoco_val.json',
46 |              img_prefix=data_root + 'images',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root + 'refcoco/refcoco_testA.json',
50 |               img_prefix=data_root + 'images',
51 |               pipeline=test_pipeline))
52 | evaluation = dict(interval=1, metric=['IoU', 'Acc'])
53 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # dataset settings
 3 | dataset_type = 'VOCDataset'
 4 | data_root = 'data/VOCdevkit/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1000, 600),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type='RepeatDataset',
35 |                times=3,
36 |                dataset=dict(
37 |                    type=dataset_type,
38 |                    ann_file=[
39 |                        data_root + 'VOC2007/ImageSets/Main/trainval.txt',
40 |                        data_root + 'VOC2012/ImageSets/Main/trainval.txt'
41 |                    ],
42 |                    img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
43 |                    pipeline=train_pipeline)),
44 |     val=dict(type=dataset_type,
45 |              ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
46 |              img_prefix=data_root + 'VOC2007/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
50 |               img_prefix=data_root + 'VOC2007/',
51 |               pipeline=test_pipeline))
52 | evaluation = dict(interval=1, metric='mAP')
53 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | checkpoint_config = dict(interval=1)
 3 | # yapf:disable
 4 | log_config = dict(
 5 |     interval=50,
 6 |     hooks=[
 7 |         dict(type='TextLoggerHook'),
 8 |         # dict(type='TensorboardLoggerHook')
 9 |     ])
10 | # yapf:enable
11 | custom_hooks = [dict(type='NumClassCheckHook')]
12 | 
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | load_from = None
16 | resume_from = None
17 | workflow = [('train', 1)]
18 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=3,
 8 |         strides=(1, 2, 2),
 9 |         dilations=(1, 1, 1),
10 |         out_indices=(2, ),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=False),
13 |         norm_eval=True,
14 |         style='caffe',
15 |         init_cfg=dict(
16 |             type='Pretrained',
17 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 |     neck=None,
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=1024,
22 |         feat_channels=1024,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[2, 4, 8, 16, 32],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[16]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=12000,
56 |             max_per_img=2000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     backbone=dict(
 6 |         type='SSDVGG',
 7 |         depth=16,
 8 |         with_last_pool=False,
 9 |         ceil_mode=True,
10 |         out_indices=(3, 4),
11 |         out_feature_indices=(22, 34),
12 |         init_cfg=dict(
13 |             type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 |     neck=dict(
15 |         type='SSDNeck',
16 |         in_channels=(512, 1024),
17 |         out_channels=(512, 1024, 512, 256, 256, 256),
18 |         level_strides=(2, 2, 1, 1),
19 |         level_paddings=(1, 1, 0, 0),
20 |         l2_norm_scale=20),
21 |     bbox_head=dict(
22 |         type='SSDHead',
23 |         in_channels=(512, 1024, 512, 256, 256, 256),
24 |         num_classes=80,
25 |         anchor_generator=dict(
26 |             type='SSDAnchorGenerator',
27 |             scale_major=False,
28 |             input_size=input_size,
29 |             basesize_ratio_range=(0.15, 0.9),
30 |             strides=[8, 16, 32, 64, 100, 300],
31 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 |         bbox_coder=dict(
33 |             type='DeltaXYWHBBoxCoder',
34 |             target_means=[.0, .0, .0, .0],
35 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
36 |     # model training and testing settings
37 |     train_cfg=dict(
38 |         assigner=dict(
39 |             type='MaxIoUAssigner',
40 |             pos_iou_thr=0.5,
41 |             neg_iou_thr=0.5,
42 |             min_pos_iou=0.,
43 |             ignore_iof_thr=-1,
44 |             gt_max_assign_all=False),
45 |         smoothl1_beta=1.,
46 |         allowed_border=-1,
47 |         pos_weight=-1,
48 |         neg_pos_ratio=3,
49 |         debug=False),
50 |     test_cfg=dict(
51 |         nms_pre=1000,
52 |         nms=dict(type='nms', iou_threshold=0.45),
53 |         min_bbox_size=0,
54 |         score_thr=0.02,
55 |         max_per_img=200))
56 | cudnn_benchmark = True
57 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[27, 33])
11 | runner = dict(type='EpochBasedRunner', max_epochs=36)
12 | 


--------------------------------------------------------------------------------
/wsdm2023/configs/_base_/schedules/schedule_6x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=2000,
 9 |     warmup_ratio=0.001,
10 |     step=[62, 68])
11 | runner = dict(type='EpochBasedRunner', max_epochs=72)
12 | 


--------------------------------------------------------------------------------
/wsdm2023/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29600}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/wsdm2023/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=63667 \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/wsdm2023/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .checkpoint import load_checkpoint
 3 | from .customized_text import CustomizedTextLoggerHook
 4 | from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
 5 | 
 6 | __all__ = [
 7 |     'LayerDecayOptimizerConstructor', 'CustomizedTextLoggerHook',
 8 |     'load_checkpoint'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .models import *  # noqa: F401,F403
3 | from .datasets import *
4 | from .apis import *
5 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline import LoadRefer, TokenizeRefer, RandomParaPhrase, RandomFlipWithRefer
2 | 
3 | __all__ = ['LoadRefer', 'TokenizeRefer',
4 |            'RandomParaPhrase', 'RandomFlipWithRefer']
5 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .wsdm2023_coco import WSDMCocoDataset
2 | from .vg_dataset import VGDataset
3 | 
4 | __all__ = ['WSDMCocoDataset','VGDataset']
5 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .backbones import *  # noqa: F401,F403
3 | from .detectors import *  # noqa: F401,F403
4 | from .dense_heads import *  # noqa: F401,F403
5 | from .utils import *  # noqa: F401,F403
6 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .uniperceiver_adapter import UniPerceiverAdapter
3 | 
4 | 
5 | __all__ = ['UniPerceiverAdapter']
6 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .deformable_detr_head import DeformableDETRHead
2 | from .detr_head import DETRHead
3 | from .dino_head import DINOHead
4 | 
5 | __all__ = ['DeformableDETRHead', 'DETRHead', 'DINOHead']
6 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .grounding_dino import GroundingDINO
2 | 
3 | 
4 | __all__ = ['GroundingDINO']
5 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .query_denoising import build_dn_generator
2 | from .transformer import DinoTransformer, DinoTransformerDecoder
3 | from .point_sample import get_uncertainty, get_uncertain_point_coords_with_randomness
4 | 
5 | __all__ = ['build_dn_generator', 'DinoTransformer', 'DinoTransformerDecoder',
6 |            'get_uncertainty', 'get_uncertain_point_coords_with_randomness']


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/utils/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_tokenizer
2 | from .tokenization_clip import ClipTokenizer
3 | 


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/utils/tokenization/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/czczup/ViT-Adapter/94ffa6b6134b99d838312e2e042d6ac3a52a7ef8/wsdm2023/mmdet_custom/models/utils/tokenization/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/wsdm2023/mmdet_custom/models/utils/tokenization/builder.py:
--------------------------------------------------------------------------------
1 | from .tokenization_clip import MaskClipTokenizer
2 | 
3 | 
4 | def build_tokenizer(tokenizer):
5 |     if tokenizer['name']=='clip_tokenizer':
6 |         return MaskClipTokenizer(tokenizer['max_sent_len'])


--------------------------------------------------------------------------------
/wsdm2023/release.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | 
 4 | parser = argparse.ArgumentParser(description='Hyperparams')
 5 | parser.add_argument('filename', nargs='?', type=str, default=None)
 6 | 
 7 | args = parser.parse_args()
 8 | 
 9 | model = torch.load(args.filename, map_location=torch.device('cpu'))
10 | print(model.keys())
11 | 
12 | state_dict = model['state_dict']
13 | new_state_dict = {}
14 | for k, v in state_dict.items():
15 |     if "ema_" in k:
16 |         pass
17 |     else:
18 |         print(k)
19 |         new_state_dict[k] = v
20 | new_dict = {'state_dict': new_state_dict}
21 | torch.save(new_dict, args.filename.replace(".pth", "_release.pth"))


--------------------------------------------------------------------------------
/wsdm2023/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     --quotatype auto \
24 |     ${SRUN_ARGS} \
25 |     python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/wsdm2023/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     --quotatype=spot \
24 |     ${SRUN_ARGS} \
25 |     python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/wsdm2023/tools/README.md:
--------------------------------------------------------------------------------
1 | # Use parrot for paraphrase
2 | 
3 | - `pip install git+https://github.com/PrithivirajDamodaran/Parrot.git`
4 | - Go to [Huggingface](https://huggingface.co/settings/tokens) to register account and create access token
5 | - Run in terminal `huggingface-cli login`, and input your token value
6 | 


--------------------------------------------------------------------------------
/wsdm2023/tools/convertor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet_custom.models.backbones.base.uniperceiver import UnifiedBertEncoder
 3 | 
 4 | checkpoint = torch.load("pretrained/uni-perceiver-large-L24-H1024-224size-pretrained.pth", map_location=torch.device('cpu'))
 5 | checkpoint = checkpoint['model']
 6 | new_checkpoint = {}
 7 | for k, v in checkpoint.items():
 8 |     new_k = k.replace("fused_encoder.", "")
 9 |     new_k = new_k.replace("in_proj_", "in_proj.")
10 |     new_k = new_k.replace("video_embed.", "visual_embed.")
11 |     new_k = new_k.replace("visual_embed.embeddings.weight", "visual_embed.patch_embed.proj.weight")
12 |     new_k = new_k.replace("visual_embed.embeddings.bias", "visual_embed.patch_embed.proj.bias")
13 |     new_k = new_k.replace("visual_embed.embeddings_st_pos.spatial_pos_embed.weight", "visual_embed.patch_embed.spatial_pos_embed.weight")
14 |     new_k = new_k.replace("visual_embed.embeddings_st_pos.temporal_pos_embed.weight", "visual_embed.patch_embed.temporal_pos_embed.weight")
15 | 
16 |     if "loss_prepare" in new_k:
17 |         pass
18 |     # elif "token_embed" in new_k:
19 |     #     pass
20 |     else:
21 |         new_checkpoint[new_k] = v
22 |         
23 | for k, v in new_checkpoint.items():
24 |     print(k, v.shape)
25 | 
26 | model = UnifiedBertEncoder(embed_dim=1024, depth=24, num_heads=16)
27 | msg = model.load_state_dict(new_checkpoint, strict=False)
28 | torch.save(new_checkpoint, "pretrained/uni-perceiver-large-L24-H1024-224size-pretrained_converted.pth")
29 | print(msg)
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/wsdm2023/tools/paraphrase.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from parrot import Parrot
 3 | import json
 4 | import pandas
 5 | import argparse
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument('csv', type=str, help='csv file path')
13 |     parser.add_argument('out', type=str, help='output json file path')
14 |     parser.add_argument('--topn', type=int, default=3,
15 |                         help='use top n paraphrase for augment')
16 | 
17 |     return parser.parse_args()
18 | 
19 | 
20 | def main(args):
21 |     parrot = Parrot(
22 |         model_tag="prithivida/parrot_paraphraser_on_T5")
23 |     parrot.model = parrot.model.to('cuda:0')
24 |     print('Successfully load model.')
25 |     res = dict()
26 | 
27 |     df = pandas.read_csv(args.csv)
28 |     total = len(df)
29 |     for idx, data in df.iterrows():
30 |         name = data['image'].split('/')[-1]
31 |         phrase = data['question'].replace(
32 |             '\"', '').replace('?', '').strip(' ').lower()
33 |         paras = parrot.augment(input_phrase=phrase, use_gpu=True)
34 |         print('-'*100)
35 |         print(phrase)
36 |         print('-'*100)
37 |         print(paras)
38 |         if paras is None:
39 |             res[name] = [phrase]
40 |         else:
41 |             selected = []
42 |             for i, p in enumerate(paras):
43 |                 selected.append(p[0])
44 |                 if i >= args.topn:
45 |                     break
46 |             res[name] = selected
47 | 
48 |         print(f'Finished [{idx+1}/{total}]\n')
49 | 
50 |     with open(args.out, 'w') as f:
51 |         res = json.dumps(res)
52 |         f.write(res)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main(parse_args())
57 | 


--------------------------------------------------------------------------------