├── .clang-format
├── .flake8
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CITATION.cff
├── LICENSE
├── README.md
├── assets
    ├── detr_arch.png
    ├── detrex_logo.png
    └── logo_2.png
├── changlog.md
├── configs
    ├── common
    │   ├── coco_schedule.py
    │   ├── common_schedule.py
    │   ├── data
    │   │   ├── coco.py
    │   │   ├── coco_detr.py
    │   │   ├── constants.py
    │   │   └── custom.py
    │   ├── optim.py
    │   └── train.py
    └── hydra
    │   ├── slurm
    │       └── research.yaml
    │   └── train_args.yaml
├── demo
    ├── README.md
    ├── __init__.py
    ├── demo.py
    ├── mot_demo.py
    ├── mot_predictors.py
    └── predictors.py
├── detrex
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   └── detection_checkpoint.py
    ├── config
    │   ├── __init__.py
    │   └── config.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset_mappers
    │   │   ├── __init__.py
    │   │   ├── coco_instance_new_baseline_dataset_mapper.py
    │   │   ├── coco_panoptic_new_baseline_dataset_mapper.py
    │   │   ├── mask_former_instance_dataset_mapper.py
    │   │   ├── mask_former_panoptic_dataset_mapper.py
    │   │   └── mask_former_semantic_dataset_mapper.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── register_ade20k_full.py
    │   │   ├── register_ade20k_instance.py
    │   │   ├── register_ade20k_panoptic.py
    │   │   ├── register_coco_panoptic_annos_semseg.py
    │   │   ├── register_coco_stuff_10k.py
    │   │   ├── register_mapillary_vistas.py
    │   │   └── register_mapillary_vistas_panoptic.py
    │   ├── detr_dataset_mapper.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   └── color_augmentation.py
    ├── layers
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── box_ops.py
    │   ├── conv.py
    │   ├── csrc
    │   │   ├── DCNv3
    │   │   │   ├── dcnv3.h
    │   │   │   ├── dcnv3_cpu.cpp
    │   │   │   ├── dcnv3_cpu.h
    │   │   │   ├── dcnv3_cuda.cu
    │   │   │   ├── dcnv3_cuda.h
    │   │   │   └── dcnv3_im2col_cuda.cuh
    │   │   ├── MsDeformAttn
    │   │   │   ├── ms_deform_attn.h
    │   │   │   ├── ms_deform_attn_cpu.cpp
    │   │   │   ├── ms_deform_attn_cpu.h
    │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   ├── cuda_version.cu
    │   │   └── vision.cpp
    │   ├── dcn_v3.py
    │   ├── denoising.py
    │   ├── layer_norm.py
    │   ├── mlp.py
    │   ├── multi_scale_deform_attn.py
    │   ├── position_embedding.py
    │   ├── shape_spec.py
    │   └── transformer.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── convnext.py
    │   │   ├── eva.py
    │   │   ├── eva_02.py
    │   │   ├── eva_02_utils.py
    │   │   ├── focalnet.py
    │   │   ├── internimage.py
    │   │   ├── resnet.py
    │   │   ├── timm_backbone.py
    │   │   ├── torchvision_backbone.py
    │   │   ├── torchvision_resnet.py
    │   │   └── utils.py
    │   ├── criterion
    │   │   ├── __init__.py
    │   │   ├── base_criterion.py
    │   │   └── criterion.py
    │   ├── ema.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── cross_entropy_loss.py
    │   │   ├── dice_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── giou_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   └── utils.py
    │   ├── matcher
    │   │   ├── __init__.py
    │   │   ├── match_cost.py
    │   │   ├── matcher.py
    │   │   └── modified_matcher.py
    │   └── neck
    │   │   ├── __init__.py
    │   │   └── channel_mapper.py
    └── utils
    │   ├── __init__.py
    │   ├── dist.py
    │   ├── events.py
    │   └── misc.py
├── dev
    ├── linter.sh
    └── run_unittest.sh
├── docs
    ├── Makefile
    ├── README.md
    ├── requirements.txt
    └── source
    │   ├── _static
    │       └── css
    │       │   └── line_space.css
    │   ├── _templates
    │       ├── .gitkeep
    │       └── line_space.html
    │   ├── changelog.md
    │   ├── conf.py
    │   ├── index.rst
    │   ├── modules
    │       ├── detrex.config.rst
    │       ├── detrex.data.rst
    │       ├── detrex.layers.rst
    │       ├── detrex.modeling.rst
    │       ├── detrex.utils.rst
    │       └── index.rst
    │   └── tutorials
    │       ├── Config_System.md
    │       ├── Converters.md
    │       ├── Customize_Training.md
    │       ├── Download_Pretrained_Weights.md
    │       ├── FAQs.md
    │       ├── Getting_Started.md
    │       ├── Installation.md
    │       ├── Model_Zoo.md
    │       ├── Tools.md
    │       ├── Using_Pretrained_Backbone.md
    │       ├── assets
    │           ├── annotation_demo.jpg
    │           ├── cosine_lr_scheduler.png
    │           ├── demo_output.jpg
    │           ├── dino_prediction_demo.jpg
    │           ├── exponential_lr_scheduler.png
    │           ├── linear_lr_scheduler.png
    │           ├── multi_step_example.png
    │           ├── multi_step_lr_scheduler.png
    │           ├── step_lr_scheduler.png
    │           └── step_lr_with_fixed_gamma.png
    │       └── index.rst
├── projects
    ├── README.md
    ├── align_detr
    │   ├── README.md
    │   ├── configs
    │   │   ├── aligndetr_k=2_r50_4scale_12ep.py
    │   │   ├── aligndetr_k=2_r50_4scale_24ep.py
    │   │   ├── aligndetr_k=2_r50_4scale_36ep.py
    │   │   └── models
    │   │   │   └── aligndetr_r50.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── aligndetr.py
    │   │   ├── criterions
    │   │       ├── __init__.py
    │   │       ├── aligndetr_dn_criterion.py
    │   │       ├── base_criterion.py
    │   │       ├── many_to_one_criterion.py
    │   │       └── two_stage_criterion.py
    │   │   ├── losses
    │   │       ├── __init__.py
    │   │       └── losses.py
    │   │   ├── matchers
    │   │       ├── __init__.py
    │   │       └── mixed_matcher.py
    │   │   └── transformer.py
    ├── anchor_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── anchor_detr_arch.png
    │   ├── configs
    │   │   ├── anchor_detr_r101_50ep.py
    │   │   ├── anchor_detr_r101_dc5_50ep.py
    │   │   ├── anchor_detr_r50_50ep.py
    │   │   ├── anchor_detr_r50_dc5_50ep.py
    │   │   └── models
    │   │   │   └── anchor_detr_r50.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── anchor_detr.py
    │   │   ├── anchor_detr_transformer.py
    │   │   ├── row_column_decoupled_attention.py
    │   │   └── utils.py
    ├── co_mot
    │   ├── README.md
    │   ├── configs
    │   │   ├── common
    │   │   │   ├── dancetrack_schedule.py
    │   │   │   └── data
    │   │   │   │   └── dancetrack_mot.py
    │   │   ├── mot_r50.py
    │   │   └── mot_r50_4scale_10ep.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   └── register_dancetrack_mot.py
    │   │   ├── mot_build.py
    │   │   ├── mot_dataset_mapper.py
    │   │   └── transforms
    │   │   │   ├── __init__.py
    │   │   │   └── mot_transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   └── dancetrack_evaluation.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── matcher.py
    │   │   ├── mot.py
    │   │   ├── mot_transformer.py
    │   │   └── qim.py
    │   ├── train_net.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── checkpoint.py
    │   │   └── misc.py
    ├── conditional_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── attention-maps.png
    │   ├── configs
    │   │   ├── conditional_detr_r101_50ep.py
    │   │   ├── conditional_detr_r101_dc5_50ep.py
    │   │   ├── conditional_detr_r50_50ep.py
    │   │   ├── conditional_detr_r50_dc5_50ep.py
    │   │   └── models
    │   │   │   ├── conditional_detr_r50.py
    │   │   │   └── conditional_detr_r50_dc5.py
    │   ├── converter.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── conditional_detr.py
    │   │   └── conditional_transformer.py
    ├── dab_deformable_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── dab_detr_overall.png
    │   ├── configs
    │   │   ├── dab_deformable_detr_r50_50ep.py
    │   │   ├── dab_deformable_detr_r50_two_stage_50ep.py
    │   │   └── models
    │   │   │   └── dab_deformable_detr_r50.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── dab_deformable_detr.py
    │   │   ├── dab_deformable_transformer.py
    │   │   └── two_stage_criterion.py
    ├── dab_detr
    │   ├── README.md
    │   ├── assets
    │   │   ├── dab_detr_details.png
    │   │   └── dab_detr_overall.png
    │   ├── configs
    │   │   ├── dab_detr_r101_50ep.py
    │   │   ├── dab_detr_r101_dc5_50ep.py
    │   │   ├── dab_detr_r50_3patterns_50ep.py
    │   │   ├── dab_detr_r50_50ep.py
    │   │   ├── dab_detr_r50_dc5_3patterns_50ep.py
    │   │   ├── dab_detr_r50_dc5_50ep.py
    │   │   ├── dab_detr_swin_b_in21k_50ep.py
    │   │   ├── dab_detr_swin_t_in1k_50ep.py
    │   │   └── models
    │   │   │   ├── dab_detr_r50.py
    │   │   │   ├── dab_detr_r50_3patterns.py
    │   │   │   ├── dab_detr_r50_dc5.py
    │   │   │   ├── dab_detr_swin_base.py
    │   │   │   └── dab_detr_swin_tiny.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── dab_detr.py
    │   │   └── dab_transformer.py
    ├── deformable_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── deformable_detr.png
    │   ├── configs
    │   │   ├── deformable_detr_r50_50ep.py
    │   │   ├── deformable_detr_r50_two_stage_50ep.py
    │   │   ├── deformable_detr_r50_with_box_refinement_50ep.py
    │   │   └── models
    │   │   │   └── deformable_detr_r50.py
    │   ├── convert_two_stage.py
    │   ├── converter.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── deformable_criterion.py
    │   │   ├── deformable_detr.py
    │   │   └── deformable_transformer.py
    │   └── train_net.py
    ├── deta
    │   ├── README.md
    │   ├── assets
    │   │   └── deta.png
    │   ├── configs
    │   │   ├── data
    │   │   │   └── coco_detr_larger.py
    │   │   ├── deta_r50_5scale_12ep.py
    │   │   ├── deta_r50_5scale_12ep_bs8.py
    │   │   ├── deta_r50_5scale_no_frozen_backbone.py
    │   │   ├── deta_swin_large_finetune_24ep.py
    │   │   ├── improved_deformable_detr_baseline_50ep.py
    │   │   ├── models
    │   │   │   ├── deta_r50.py
    │   │   │   └── deta_swin.py
    │   │   └── scheduler
    │   │   │   └── coco_scheduler.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── assigner.py
    │   │   ├── deformable_detr.py
    │   │   ├── deformable_transformer.py
    │   │   └── deta_criterion.py
    │   └── train_net.py
    ├── detr
    │   ├── README.md
    │   ├── assets
    │   │   └── DETR.png
    │   ├── configs
    │   │   ├── detr_r101_300ep.py
    │   │   ├── detr_r101_dc5_300ep.py
    │   │   ├── detr_r50_300ep.py
    │   │   ├── detr_r50_dc5_300ep.py
    │   │   └── models
    │   │   │   ├── detr_r50.py
    │   │   │   └── detr_r50_dc5.py
    │   ├── converter.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── detr.py
    │   │   └── transformer.py
    ├── dino
    │   ├── README.md
    │   ├── assets
    │   │   └── dino_arch.png
    │   ├── configs
    │   │   ├── dino-convnext
    │   │   │   ├── dino_convnext_base_384_4scale_12ep.py
    │   │   │   ├── dino_convnext_large_384_4scale_12ep.py
    │   │   │   ├── dino_convnext_small_384_4scale_12ep.py
    │   │   │   └── dino_convnext_tiny_384_4scale_12ep.py
    │   │   ├── dino-eva-01
    │   │   │   ├── dino_eva_01_1536_4scale_12ep.py
    │   │   │   └── dino_eva_01_4scale_12ep.py
    │   │   ├── dino-focal
    │   │   │   ├── dino_focal_base_lrf_fl3_4scale_12ep.py
    │   │   │   ├── dino_focal_small_lrf_fl3_4scale_12ep.py
    │   │   │   ├── dino_focal_tiny_lrf_fl3_4scale_12ep.py
    │   │   │   ├── dino_focalnet_large_lrf_384_4scale_12ep.py
    │   │   │   ├── dino_focalnet_large_lrf_384_4scale_36ep.py
    │   │   │   ├── dino_focalnet_large_lrf_384_fl4_4scale_12ep.py
    │   │   │   ├── dino_focalnet_large_lrf_384_fl4_5scale_12ep.py
    │   │   │   ├── dino_focalnet_large_lrf_384_fl4_5scale_36ep.py
    │   │   │   └── focalnet.py
    │   │   ├── dino-internimage
    │   │   │   ├── dino_internimage_base_4scale_12ep.py
    │   │   │   ├── dino_internimage_large_4scale_12ep.py
    │   │   │   ├── dino_internimage_small_4scale_12ep.py
    │   │   │   └── dino_internimage_tiny_4scale_12ep.py
    │   │   ├── dino-resnet
    │   │   │   ├── dino_r101_4scale_12ep.py
    │   │   │   ├── dino_r50_4scale_12ep.py
    │   │   │   ├── dino_r50_4scale_12ep_300dn.py
    │   │   │   ├── dino_r50_4scale_12ep_better_hyper.py
    │   │   │   ├── dino_r50_4scale_12ep_no_frozen.py
    │   │   │   ├── dino_r50_4scale_24ep.py
    │   │   │   └── dino_r50_5scale_12ep.py
    │   │   ├── dino-swin
    │   │   │   ├── dino_swin_base_384_4scale_12ep.py
    │   │   │   ├── dino_swin_large_224_4scale_12ep.py
    │   │   │   ├── dino_swin_large_384_4scale_12ep.py
    │   │   │   ├── dino_swin_large_384_4scale_36ep.py
    │   │   │   ├── dino_swin_large_384_5scale_12ep.py
    │   │   │   ├── dino_swin_large_384_5scale_36ep.py
    │   │   │   ├── dino_swin_small_224_4scale_12ep.py
    │   │   │   └── dino_swin_tiny_224_4scale_12ep.py
    │   │   ├── dino-vitdet
    │   │   │   ├── dino_vitdet_base_4scale_12ep.py
    │   │   │   ├── dino_vitdet_base_4scale_50ep.py
    │   │   │   ├── dino_vitdet_large_4scale_12ep.py
    │   │   │   └── dino_vitdet_large_4scale_50ep.py
    │   │   ├── models
    │   │   │   ├── dino_convnext.py
    │   │   │   ├── dino_eva_01.py
    │   │   │   ├── dino_focalnet.py
    │   │   │   ├── dino_internimage.py
    │   │   │   ├── dino_r50.py
    │   │   │   ├── dino_swin_base_384.py
    │   │   │   ├── dino_swin_large_224.py
    │   │   │   ├── dino_swin_large_384.py
    │   │   │   ├── dino_swin_small_224.py
    │   │   │   ├── dino_swin_tiny_224.py
    │   │   │   └── dino_vitdet.py
    │   │   ├── timm_example.py
    │   │   └── torchvision_example.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── dino.py
    │   │   ├── dino_transformer.py
    │   │   ├── dn_criterion.py
    │   │   └── two_stage_criterion.py
    │   └── train_net.py
    ├── dino_eva
    │   ├── README.md
    │   ├── assets
    │   │   └── dino_arch.png
    │   ├── configs
    │   │   ├── common
    │   │   │   ├── coco_loader_lsj.py
    │   │   │   ├── coco_loader_lsj_1024.py
    │   │   │   ├── coco_loader_lsj_1280.py
    │   │   │   └── coco_loader_lsj_1536.py
    │   │   ├── dino-eva-01
    │   │   │   ├── dino_eva_01_1280_4scale_12ep.py
    │   │   │   └── dino_eva_01_1536_4scale_12ep.py
    │   │   ├── dino-eva-02
    │   │   │   ├── dino_eva_02_vitdet_b_4attn_1024_lrd0p7_4scale_12ep.py
    │   │   │   ├── dino_eva_02_vitdet_b_6attn_win32_1536_lrd0p7_4scale_12ep.py
    │   │   │   ├── dino_eva_02_vitdet_l_4attn_1024_lrd0p8_4scale_12ep.py
    │   │   │   ├── dino_eva_02_vitdet_l_4attn_1280_lrd0p8_4scale_12ep.py
    │   │   │   ├── dino_eva_02_vitdet_l_8attn_1536_lrd0p8_4scale_12ep.py
    │   │   │   └── dino_eva_02_vitdet_l_8attn_win32_1536_lrd0p8_4scale_12ep.py
    │   │   └── models
    │   │   │   ├── dino_eva_01.py
    │   │   │   ├── dino_eva_02.py
    │   │   │   └── dino_r50.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── dino.py
    │   │   ├── dino_transformer.py
    │   │   ├── dn_criterion.py
    │   │   └── two_stage_criterion.py
    │   └── train_net.py
    ├── dn_deformable_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── dn_detr_arch.png
    │   ├── configs
    │   │   ├── dn_deformable_detr_r50_12ep.py
    │   │   ├── dn_deformable_detr_r50_50ep.py
    │   │   └── models
    │   │   │   └── dn_deformable_detr_r50.py
    │   ├── converter.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── dn_criterion.py
    │   │   ├── dn_deformable_detr.py
    │   │   └── dn_deformable_transformer.py
    ├── dn_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── dn_detr_arch.png
    │   ├── configs
    │   │   ├── dn_detr_r50_50ep.py
    │   │   ├── dn_detr_r50_dc5_50ep.py
    │   │   └── models
    │   │   │   ├── dn_detr_r50.py
    │   │   │   └── dn_detr_r50_dc5.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── dn_criterion.py
    │   │   ├── dn_detr.py
    │   │   └── dn_transformers.py
    ├── focus_detr
    │   ├── README.md
    │   ├── configs
    │   │   ├── focus_detr_resnet
    │   │   │   ├── focus_detr_r101_4scale_12ep.py
    │   │   │   ├── focus_detr_r101_4scale_24ep.py
    │   │   │   ├── focus_detr_r101_4scale_36ep.py
    │   │   │   ├── focus_detr_r50_4scale_12ep.py
    │   │   │   ├── focus_detr_r50_4scale_24ep.py
    │   │   │   └── focus_detr_r50_4scale_36ep.py
    │   │   ├── focus_detr_swin
    │   │   │   ├── focus_detr_swin_base_224_4scale_36ep.py
    │   │   │   ├── focus_detr_swin_base_384_4scale_36ep.py
    │   │   │   ├── focus_detr_swin_large_384_4scale_36ep.py
    │   │   │   ├── focus_detr_swin_tiny_224_4scale_12ep.py
    │   │   │   ├── focus_detr_swin_tiny_224_4scale_22k_12ep.py
    │   │   │   ├── focus_detr_swin_tiny_224_4scale_22k_36ep.py
    │   │   │   ├── focus_detr_swin_tiny_224_4scale_24ep.py
    │   │   │   └── focus_detr_swin_tiny_224_4scale_36ep.py
    │   │   └── models
    │   │   │   ├── focus_detr_r50.py
    │   │   │   ├── focus_detr_swin_base_384.py
    │   │   │   ├── focus_detr_swin_large_224.py
    │   │   │   ├── focus_detr_swin_large_384.py
    │   │   │   ├── focus_detr_swin_small_224.py
    │   │   │   └── focus_detr_swin_tiny_224.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── dn_criterion.py
    │   │   ├── focus_detr.py
    │   │   ├── focus_detr_transformer.py
    │   │   ├── foreground_supervision.py
    │   │   ├── transformer_layer.py
    │   │   └── two_stage_criterion.py
    ├── group_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── group_detr_arch.png
    │   ├── configs
    │   │   ├── group_detr_r50_50ep.py
    │   │   └── models
    │   │   │   └── group_detr_r50.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── attention.py
    │   │   ├── group_criterion.py
    │   │   ├── group_detr.py
    │   │   ├── group_detr_transformer.py
    │   │   └── group_matcher.py
    ├── h_deformable_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── h_detr_arch.png
    │   ├── configs
    │   │   ├── h_deformable_detr_r50_50ep.py
    │   │   ├── h_deformable_detr_r50_two_stage_12ep.py
    │   │   ├── h_deformable_detr_r50_two_stage_36ep.py
    │   │   ├── h_deformable_detr_swin_large_two_stage_12ep.py
    │   │   ├── h_deformable_detr_swin_large_two_stage_12ep_900queries.py
    │   │   ├── h_deformable_detr_swin_large_two_stage_36ep.py
    │   │   ├── h_deformable_detr_swin_large_two_stage_36ep_900queries.py
    │   │   ├── h_deformable_detr_swin_tiny_two_stage_12ep.py
    │   │   ├── h_deformable_detr_swin_tiny_two_stage_36ep.py
    │   │   └── models
    │   │   │   └── h_deformable_detr_r50.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── deformable_criterion.py
    │   │   ├── h_deformable_detr.py
    │   │   └── h_deformable_transformer.py
    │   └── train_net.py
    ├── maskdino
    │   ├── README.md
    │   ├── assets
    │   │   ├── dinosaur.png
    │   │   ├── framework.jpg
    │   │   ├── instance.png
    │   │   ├── panoptic.png
    │   │   ├── semantic.png
    │   │   └── sota.png
    │   ├── configs
    │   │   ├── data
    │   │   │   ├── ade20k_semantic_seg.py
    │   │   │   ├── coco_instance_seg.py
    │   │   │   └── coco_panoptic_seg.py
    │   │   ├── maskdino_r50_ade20k_semantic_seg_160k.py
    │   │   ├── maskdino_r50_coco_instance_seg_50ep.py
    │   │   ├── maskdino_r50_coco_panoptic_seg_50ep.py
    │   │   ├── maskdino_r50_instance_seg_50ep.py
    │   │   └── models
    │   │   │   └── maskdino_r50.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── dataset_mappers
    │   │   │   ├── __init__.py
    │   │   │   └── coco_instance_lsj_aug_dataset_mapper.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   └── instance_evaluation.py
    │   ├── maskdino.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── backbone
    │   │   │   ├── __init__.py
    │   │   │   ├── focal.py
    │   │   │   └── swin.py
    │   │   ├── criterion.py
    │   │   ├── matcher.py
    │   │   ├── meta_arch
    │   │   │   ├── __init__.py
    │   │   │   └── maskdino_head.py
    │   │   ├── pixel_decoder
    │   │   │   ├── __init__.py
    │   │   │   ├── maskdino_encoder.py
    │   │   │   └── position_encoding.py
    │   │   └── transformer_decoder
    │   │   │   ├── __init__.py
    │   │   │   ├── dino_decoder.py
    │   │   │   ├── maskdino_decoder.py
    │   │   │   └── utils.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── box_ops.py
    │   │   ├── misc.py
    │   │   └── utils.py
    ├── pnp_detr
    │   ├── README.md
    │   ├── assets
    │   │   └── PnP-DETR.png
    │   ├── configs
    │   │   ├── models
    │   │   │   └── pnp_detr_r50.py
    │   │   ├── pnp_detr_r101_300ep.py
    │   │   └── pnp_detr_r50_300ep.py
    │   └── modeling
    │   │   ├── __init__.py
    │   │   ├── detr.py
    │   │   └── transformer.py
    └── sqr_detr
    │   ├── README.md
    │   ├── assets
    │       └── sqr_detr_overall.png
    │   ├── configs
    │       ├── dab_detr_r50_50ep_sqr.py
    │       └── models
    │       │   └── dab_detr_r50_sqr.py
    │   └── modeling
    │       ├── __init__.py
    │       └── dab_transformer_sqr.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── test_cond_attn.py
    ├── test_ffn.py
    ├── test_losses.py
    ├── test_ms_deform_attn.py
    ├── test_position_embedding.py
    ├── test_torchvision_backbone.py
    ├── test_transformer.py
    └── utils
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── losses.py
    │   ├── mlp.py
    │   ├── potision_embedding.py
    │   └── transformer.py
└── tools
    ├── README.md
    ├── __init__.py
    ├── analyze_model.py
    ├── benchmark.py
    ├── hydra_train_net.py
    ├── train_net.py
    ├── visualize_data.py
    └── visualize_json_results.py


/.flake8:
--------------------------------------------------------------------------------
 1 | # This is an example .flake8 config, used when developing *Black* itself.
 2 | # Keep in sync with setup.cfg which is used for source packages.
 3 | 
 4 | [flake8]
 5 | ignore = W503, E203, E221, C901, C408, E741, C407, B017
 6 | max-line-length = 120
 7 | max-complexity = 18
 8 | select = B,C,E,F,W,T4,B9
 9 | exclude = build, detectron2
10 | per-file-ignores =
11 |   **/__init__.py:F401,F403,E402
12 |   **/configs/**.py:F401,E402
13 |   configs/**.py:F401,E402
14 |   **/tests/config/**.py:F401,E402
15 |   tests/config/**.py:F401,E402
16 |   tests/**.py: E402
17 |   tools/**.py: E402
18 |   projects/**/configs/**.py:F401
19 |   detectron2/**.py: F401,F403,E402,F811,W391
20 |   detectron2/projects/**.py: F401,F403,E402,F811,W391


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | 
2 | [submodule "detectron2"]
3 | 	path = detectron2
4 | 	url = https://github.com/facebookresearch/detectron2.git
5 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.7"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "16"
15 |     # rust: "1.55"
16 |     # golang: "1.17"
17 | 
18 | # Build documentation in the docs/ directory with Sphinx
19 | sphinx:
20 |    configuration: docs/source/conf.py
21 | 
22 | # If using Sphinx, optionally build your docs in additional formats such as PDF
23 | # formats:
24 | #    - pdf
25 | 
26 | # Optionally declare the Python requirements required to build your docs
27 | python:
28 |    install:
29 |    - requirements: requirements.txt
30 |    - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 |   - name: "detrex Contributors"
5 | title: "IDEA-CVR Detection-Transformer Toolbox and Benchmark"
6 | date-released: 2022-09-21
7 | url: "https://github.com/IDEA-Research/detrex"
8 | license: Apache-2.0
9 | 


--------------------------------------------------------------------------------
/assets/detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/detr_arch.png


--------------------------------------------------------------------------------
/assets/detrex_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/detrex_logo.png


--------------------------------------------------------------------------------
/assets/logo_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/logo_2.png


--------------------------------------------------------------------------------
/configs/common/data/coco.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | 
 3 | import detectron2.data.transforms as T
 4 | from detectron2.config import LazyCall as L
 5 | from detectron2.data import (
 6 |     DatasetMapper,
 7 |     build_detection_test_loader,
 8 |     build_detection_train_loader,
 9 |     get_detection_dataset_dicts,
10 | )
11 | from detectron2.evaluation import COCOEvaluator
12 | 
13 | dataloader = OmegaConf.create()
14 | 
15 | dataloader.train = L(build_detection_train_loader)(
16 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
17 |     mapper=L(DatasetMapper)(
18 |         is_train=True,
19 |         augmentations=[
20 |             L(T.ResizeShortestEdge)(
21 |                 short_edge_length=(640, 672, 704, 736, 768, 800),
22 |                 sample_style="choice",
23 |                 max_size=1333,
24 |             ),
25 |             L(T.RandomFlip)(horizontal=True),
26 |         ],
27 |         image_format="BGR",
28 |         use_instance_mask=True,
29 |     ),
30 |     total_batch_size=16,
31 |     num_workers=4,
32 | )
33 | 
34 | dataloader.test = L(build_detection_test_loader)(
35 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
36 |     mapper=L(DatasetMapper)(
37 |         is_train=False,
38 |         augmentations=[
39 |             L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
40 |         ],
41 |         image_format="${...train.mapper.image_format}",
42 |     ),
43 |     num_workers=4,
44 | )
45 | 
46 | dataloader.evaluator = L(COCOEvaluator)(
47 |     dataset_name="${..test.dataset.names}",
48 | )
49 | 


--------------------------------------------------------------------------------
/configs/common/data/constants.py:
--------------------------------------------------------------------------------
1 | constants = dict(
2 |     imagenet_rgb256_mean=[123.675, 116.28, 103.53],
3 |     imagenet_rgb256_std=[58.395, 57.12, 57.375],
4 |     imagenet_bgr256_mean=[103.530, 116.280, 123.675],
5 |     # When using pre-trained models in Detectron1 or any MSRA models,
6 |     # std has been absorbed into its conv1 weights, so the std needs to be set 1.
7 |     # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
8 |     imagenet_bgr256_std=[1.0, 1.0, 1.0],
9 | )


--------------------------------------------------------------------------------
/configs/common/optim.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.solver.build import get_default_optimizer_params
 5 | 
 6 | SGD = L(torch.optim.SGD)(
 7 |     params=L(get_default_optimizer_params)(
 8 |         # params.model is meant to be set to the model object, before instantiating
 9 |         # the optimizer.
10 |         weight_decay_norm=0.0
11 |     ),
12 |     lr=0.02,
13 |     momentum=0.9,
14 |     weight_decay=1e-4,
15 | )
16 | 
17 | 
18 | AdamW = L(torch.optim.AdamW)(
19 |     params=L(get_default_optimizer_params)(
20 |         # params.model is meant to be set to the model object, before instantiating
21 |         # the optimizer.
22 |         base_lr="${..lr}",
23 |         weight_decay_norm=0.0,
24 |     ),
25 |     lr=1e-4,
26 |     betas=(0.9, 0.999),
27 |     weight_decay=0.1,
28 | )
29 | 


--------------------------------------------------------------------------------
/configs/hydra/slurm/research.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | partition: research # Partition where to submit
 3 | ngpus: ${num_gpus} # Number of gpus to request on each node
 4 | nodes: ${num_machines} # Number of nodes to request
 5 | cpus_per_task: 5 # Number of cpus per task/gpu
 6 | timeout: 240 # Duration of the job, in hours
 7 | job_name: "detrex" # job_name to display with `squeue`
 8 | job_dir: ~ # Job directory; leave empty for default (hydra.run.dir)
 9 | exclude_node: ~ # The node(s) to be excluded for slurm assignment, e.g. SH-IDC1-10-198-3-[10,20]
10 | comment: ~ # Comment to pass to scheduler, e.g. priority message
11 | quotatype: ~ # Some clusters may set different quotatype with different priority, e.g. reserved/spot
12 | 
13 | ddp_comm_mode: "tcp" # ddp communication mode, "file" or "tcp"
14 | share_root: /path/that/can/be/accessed/by/all/machines # for "file" mode only
15 | master_port: ~ # for "tcp" mode only, leave empty to find available port automatically
16 | 


--------------------------------------------------------------------------------
/configs/hydra/train_args.yaml:
--------------------------------------------------------------------------------
 1 | ######### converted from default argparse args ###########
 2 | # config_file: ''
 3 | config_file: ${pycfg_dir}/${pycfg_file}
 4 | resume: false
 5 | eval_only: false
 6 | num_gpus: 1
 7 | num_machines: 1
 8 | machine_rank: 0
 9 | dist_url: tcp://127.0.0.1:24999
10 | opts: []
11 | ############################################################
12 | 
13 | # aux params for easier management of overrides
14 | pycfg_dir: projects/detr/configs
15 | pycfg_file: detr_r50_300ep.py
16 | 
17 | # use automatic experiment name / output dir
18 | auto_output_dir: True
19 | 
20 | hydra:
21 |   run:
22 |     # https://hydra.cc/docs/configure_hydra/workdir/
23 |     dir: "outputs/${hydra.job.override_dirname}/${now:%Y%m%d-%H:%M:%S}"
24 |   job:
25 |     config:
26 |       override_dirname:
27 |         kv_sep: '.'
28 |         item_sep: '-'
29 |         exclude_keys:
30 |           - config_file
31 |           - pycfg_dir
32 |           - slurm
33 |           - slurm.quotatype
34 |           - dist_url
35 |           - auto_output_dir
36 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ## detrex demo
3 | 
4 | We provide a command line tool to run a simple demo using pretrained weights.
5 | The usage is explained in [Getting Started with detrex](https://detrex.readthedocs.io/en/latest/tutorials/Getting_Started.html).
6 | 
7 | 


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
1 | from .predictors import VisualizationDemo
2 | 


--------------------------------------------------------------------------------
/detrex/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from detrex import layers
17 | from detrex import modeling
18 | from detrex import utils
19 | from detrex import data
20 | from detrex import config
21 | 


--------------------------------------------------------------------------------
/detrex/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection_checkpoint import DetectionCheckpointer


--------------------------------------------------------------------------------
/detrex/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .config import try_get_key, get_config
18 | 


--------------------------------------------------------------------------------
/detrex/config/config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import os
18 | import pkg_resources
19 | from omegaconf import OmegaConf
20 | 
21 | from detectron2.config import LazyConfig
22 | 
23 | 
24 | def try_get_key(cfg, *keys, default=None):
25 |     """
26 |     Try select keys from lazy cfg until the first key that exists. Otherwise return default.
27 |     """
28 |     for k in keys:
29 |         none = object()
30 |         p = OmegaConf.select(cfg, k, default=none)
31 |         if p is not none:
32 |             return p
33 |     return default
34 | 
35 | 
36 | def get_config(config_path):
37 |     """
38 |     Returns a config object from a config_path.
39 | 
40 |     Args:
41 |         config_path (str): config file name relative to detrex's "configs/"
42 |             directory, e.g., "common/train.py"
43 | 
44 |     Returns:
45 |         omegaconf.DictConfig: a config object
46 |     """
47 |     cfg_file = pkg_resources.resource_filename(
48 |         "detrex.config", os.path.join("configs", config_path)
49 |     )
50 |     if not os.path.exists(cfg_file):
51 |         raise RuntimeError("{} not available in detrex configs!".format(config_path))
52 |     cfg = LazyConfig.load(cfg_file)
53 |     return cfg
54 | 


--------------------------------------------------------------------------------
/detrex/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .detr_dataset_mapper import DetrDatasetMapper
17 | from .dataset_mappers import (
18 |     COCOInstanceNewBaselineDatasetMapper,
19 |     COCOPanopticNewBaselineDatasetMapper,
20 |     MaskFormerSemanticDatasetMapper,
21 |     MaskFormerInstanceDatasetMapper,
22 |     MaskFormerPanopticDatasetMapper,
23 | )
24 | from . import datasets
25 | from .transforms import ColorAugSSDTransform
26 | 


--------------------------------------------------------------------------------
/detrex/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .coco_instance_new_baseline_dataset_mapper import build_transform_gen as coco_instance_transform_gen
17 | from .coco_panoptic_new_baseline_dataset_mapper import build_transform_gen as coco_panoptic_transform_gen
18 | from .mask_former_semantic_dataset_mapper import build_transform_gen as maskformer_semantic_transform_gen
19 | from .coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper
20 | from .coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper
21 | from .mask_former_instance_dataset_mapper import MaskFormerInstanceDatasetMapper
22 | from .mask_former_panoptic_dataset_mapper import MaskFormerPanopticDatasetMapper
23 | from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
24 | 


--------------------------------------------------------------------------------
/detrex/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ------------------------------------------------------------------------------------------------
16 | # Copyright (c) Facebook, Inc. and its affiliates.
17 | # ------------------------------------------------------------------------------------------------
18 | 
19 | from . import (
20 |     register_ade20k_full,
21 |     register_ade20k_panoptic,
22 |     register_coco_stuff_10k,
23 |     register_mapillary_vistas,
24 |     register_coco_panoptic_annos_semseg,
25 |     register_ade20k_instance,
26 |     register_mapillary_vistas_panoptic,
27 | )
28 | 


--------------------------------------------------------------------------------
/detrex/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .color_augmentation import ColorAugSSDTransform


--------------------------------------------------------------------------------
/detrex/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .multi_scale_deform_attn import (
17 |     MultiScaleDeformableAttention,
18 |     multi_scale_deformable_attn_pytorch,
19 | )
20 | from .dcn_v3 import (
21 |     DCNv3,
22 |     DCNv3Function,
23 |     dcnv3_core_pytorch,
24 | )
25 | from .layer_norm import LayerNorm
26 | from .box_ops import (
27 |     box_cxcywh_to_xyxy,
28 |     box_xyxy_to_cxcywh,
29 |     box_iou,
30 |     generalized_box_iou,
31 |     masks_to_boxes,
32 | )
33 | from .transformer import (
34 |     BaseTransformerLayer,
35 |     TransformerLayerSequence,
36 | )
37 | from .position_embedding import (
38 |     PositionEmbeddingLearned,
39 |     PositionEmbeddingSine,
40 |     get_sine_pos_embed,
41 | )
42 | from .mlp import MLP, FFN
43 | from .attention import (
44 |     MultiheadAttention,
45 |     ConditionalSelfAttention,
46 |     ConditionalCrossAttention,
47 | )
48 | from .conv import (
49 |     ConvNormAct,
50 |     ConvNorm,
51 | )
52 | from .denoising import (
53 |     apply_box_noise,
54 |     apply_label_noise,
55 |     GenerateDNQueries,
56 | )
57 | from .shape_spec import ShapeSpec
58 | 


--------------------------------------------------------------------------------
/detrex/layers/csrc/DCNv3/dcnv3_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                              const at::Tensor &mask, const int kernel_h,
17 |                              const int kernel_w, const int stride_h,
18 |                              const int stride_w, const int pad_h,
19 |                              const int pad_w, const int dilation_h,
20 |                              const int dilation_w, const int group,
21 |                              const int group_channels, const float offset_scale,
22 |                              const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                    const at::Tensor &mask, const int kernel_h,
27 |                    const int kernel_w, const int stride_h, const int stride_w,
28 |                    const int pad_h, const int pad_w, const int dilation_h,
29 |                    const int dilation_w, const int group,
30 |                    const int group_channels, const float offset_scale,
31 |                    const at::Tensor &grad_output, const int im2col_step);


--------------------------------------------------------------------------------
/detrex/layers/csrc/DCNv3/dcnv3_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                               const at::Tensor &mask, const int kernel_h,
17 |                               const int kernel_w, const int stride_h,
18 |                               const int stride_w, const int pad_h,
19 |                               const int pad_w, const int dilation_h,
20 |                               const int dilation_w, const int group,
21 |                               const int group_channels,
22 |                               const float offset_scale, const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                     const at::Tensor &mask, const int kernel_h,
27 |                     const int kernel_w, const int stride_h, const int stride_w,
28 |                     const int pad_h, const int pad_w, const int dilation_h,
29 |                     const int dilation_w, const int group,
30 |                     const int group_channels, const float offset_scale,
31 |                     const at::Tensor &grad_output, const int im2col_step);


--------------------------------------------------------------------------------
/detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | namespace detrex {
17 | 
18 | at::Tensor
19 | ms_deform_attn_cpu_forward(
20 |     const at::Tensor &value, 
21 |     const at::Tensor &spatial_shapes,
22 |     const at::Tensor &level_start_index,
23 |     const at::Tensor &sampling_loc,
24 |     const at::Tensor &attn_weight,
25 |     const int im2col_step)
26 | {
27 |     AT_ERROR("Not implement on cpu");
28 | }
29 | 
30 | std::vector<at::Tensor>
31 | ms_deform_attn_cpu_backward(
32 |     const at::Tensor &value, 
33 |     const at::Tensor &spatial_shapes,
34 |     const at::Tensor &level_start_index,
35 |     const at::Tensor &sampling_loc,
36 |     const at::Tensor &attn_weight,
37 |     const at::Tensor &grad_output,
38 |     const int im2col_step)
39 | {
40 |     AT_ERROR("Not implement on cpu");
41 | }
42 | 
43 | } // namespace detrex
44 | 


--------------------------------------------------------------------------------
/detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace detrex {
15 | 
16 | at::Tensor
17 | ms_deform_attn_cpu_forward(
18 |     const at::Tensor &value, 
19 |     const at::Tensor &spatial_shapes,
20 |     const at::Tensor &level_start_index,
21 |     const at::Tensor &sampling_loc,
22 |     const at::Tensor &attn_weight,
23 |     const int im2col_step);
24 | 
25 | std::vector<at::Tensor>
26 | ms_deform_attn_cpu_backward(
27 |     const at::Tensor &value, 
28 |     const at::Tensor &spatial_shapes,
29 |     const at::Tensor &level_start_index,
30 |     const at::Tensor &sampling_loc,
31 |     const at::Tensor &attn_weight,
32 |     const at::Tensor &grad_output,
33 |     const int im2col_step);
34 | 
35 | } // namespace detrex
36 | 


--------------------------------------------------------------------------------
/detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace detrex {
15 | 
16 | at::Tensor ms_deform_attn_cuda_forward(
17 |     const at::Tensor &value, 
18 |     const at::Tensor &spatial_shapes,
19 |     const at::Tensor &level_start_index,
20 |     const at::Tensor &sampling_loc,
21 |     const at::Tensor &attn_weight,
22 |     const int im2col_step);
23 | 
24 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
25 |     const at::Tensor &value, 
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 
33 | } // namespace detrex


--------------------------------------------------------------------------------
/detrex/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
 1 | #include <cuda_runtime_api.h>
 2 | 
 3 | namespace detrex {
 4 | int get_cudart_version() {
 5 |   int runtimeVersion;
 6 |   cudaRuntimeGetVersion(&runtimeVersion);
 7 |   return runtimeVersion;
 8 | }
 9 | } // namespace detrex
10 | 


--------------------------------------------------------------------------------
/detrex/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include "MsDeformAttn/ms_deform_attn.h"
 4 | #include "DCNv3/dcnv3.h"
 5 | 
 6 | namespace detrex {
 7 | 
 8 | #ifdef WITH_CUDA
 9 | extern int get_cudart_version();
10 | #endif
11 | 
12 | std::string get_cuda_version() {
13 | #ifdef WITH_CUDA
14 |   std::ostringstream oss;
15 | 
16 |   // copied from
17 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
18 |   auto printCudaStyleVersion = [&](int v) {
19 |     oss << (v / 1000) << "." << (v / 10 % 100);
20 |     if (v % 10 != 0) {
21 |       oss << "." << (v % 10);
22 |     }
23 |   };
24 |   printCudaStyleVersion(get_cudart_version());
25 |   return oss.str();
26 | #else
27 |   return std::string("not available");
28 | #endif
29 | }
30 | 
31 | // similar to
32 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
33 | std::string get_compiler_version() {
34 |   std::ostringstream ss;
35 | #if defined(__GNUC__)
36 | #ifndef __clang__
37 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
38 | #endif
39 | #endif
40 | 
41 | #if defined(__clang_major__)
42 |   {
43 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
44 |        << __clang_patchlevel__;
45 |   }
46 | #endif
47 | 
48 | #if defined(_MSC_VER)
49 |   { ss << "MSVC " << _MSC_FULL_VER; }
50 | #endif
51 |   return ss.str();
52 | }
53 | 
54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
55 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
56 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
57 |   m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
58 |   m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
59 | }
60 | 
61 | } // namespace detrex


--------------------------------------------------------------------------------
/detrex/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ------------------------------------------------------------------------------------------------
16 | # # Copyright (c) Facebook, Inc. and its affiliates.
17 | # ------------------------------------------------------------------------------------------------
18 | 
19 | from dataclasses import dataclass
20 | from typing import Optional
21 | 
22 | 
23 | @dataclass
24 | class ShapeSpec:
25 |     """
26 |     A simple structure that contains basic shape specification about a tensor.
27 |     It is often used as the auxiliary inputs/outputs of models,
28 |     to complement the lack of shape inference ability among pytorch modules.
29 |     """
30 | 
31 |     channels: Optional[int] = None
32 |     height: Optional[int] = None
33 |     width: Optional[int] = None
34 |     stride: Optional[int] = None
35 | 


--------------------------------------------------------------------------------
/detrex/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .criterion import SetCriterion, BaseCriterion
17 | from .matcher import HungarianMatcher
18 | from .losses import (
19 |     cross_entropy,
20 |     CrossEntropyLoss,
21 |     sigmoid_focal_loss,
22 |     FocalLoss,
23 |     dice_loss,
24 |     DiceLoss,
25 |     smooth_l1_loss,
26 |     l1_loss,
27 |     L1Loss,
28 |     giou_loss,
29 |     GIoULoss,
30 |     reduce_loss,
31 |     weight_reduce_loss,
32 | )
33 | from .neck import ChannelMapper
34 | from .backbone import (
35 |     BasicStem,
36 |     ResNet,
37 |     ResNetBlockBase,
38 |     make_stage,
39 |     BottleneckBlock,
40 |     BasicBlock,
41 |     ConvNeXt,
42 |     FocalNet,
43 |     TimmBackbone,
44 | )
45 | 


--------------------------------------------------------------------------------
/detrex/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .timm_backbone import TimmBackbone
17 | from .torchvision_backbone import TorchvisionBackbone
18 | from .resnet import (
19 |     BasicStem,
20 |     ResNet,
21 |     ResNetBlockBase,
22 |     make_stage,
23 |     BottleneckBlock,
24 |     BasicBlock,
25 |     DeformBottleneckBlock,
26 | )
27 | from .convnext import ConvNeXt
28 | from .focalnet import FocalNet
29 | from .internimage import InternImage
30 | from .eva import EVAViT, SimpleFeaturePyramid, get_vit_lr_decay_rate
31 | from .eva_02 import EVA02_ViT
32 | 


--------------------------------------------------------------------------------
/detrex/modeling/criterion/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .criterion import SetCriterion
17 | from .base_criterion import BaseCriterion
18 | 


--------------------------------------------------------------------------------
/detrex/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .cross_entropy_loss import cross_entropy, CrossEntropyLoss
17 | from .focal_loss import sigmoid_focal_loss, FocalLoss
18 | from .dice_loss import dice_loss, DiceLoss
19 | from .smooth_l1_loss import smooth_l1_loss, l1_loss, L1Loss
20 | from .giou_loss import giou_loss, GIoULoss
21 | from .utils import reduce_loss, weight_reduce_loss
22 | 


--------------------------------------------------------------------------------
/detrex/modeling/matcher/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .matcher import HungarianMatcher
17 | from .match_cost import FocalLossCost, CrossEntropyCost, L1Cost, GIoUCost
18 | from .modified_matcher import HungarianMatcher as ModifedMatcher
19 | 


--------------------------------------------------------------------------------
/detrex/modeling/neck/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .channel_mapper import ChannelMapper
17 | 


--------------------------------------------------------------------------------
/detrex/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .misc import (
18 |     interpolate,
19 |     inverse_sigmoid,
20 | )
21 | from .dist import (
22 |     is_dist_avail_and_initialized,
23 |     get_world_size,
24 |     get_rank,
25 | )
26 | from .events import WandbWriter
27 | 


--------------------------------------------------------------------------------
/dev/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | # cd to detrex project root
 5 | cd "$(dirname "${BASH_SOURCE[0]}")/.."
 6 | 
 7 | {
 8 |   black --version | grep -E "22\." > /dev/null
 9 | } || {
10 |   echo "Linter requires 'black==22.*' !"
11 |   exit 1
12 | }
13 | 
14 | ISORT_VERSION=$(isort --version-number)
15 | if [[ "$ISORT_VERSION" != 4.3* ]]; then
16 |   echo "Linter requires isort==4.3.21 !"
17 |   exit 1
18 | fi
19 | 
20 | set -v
21 | 
22 | echo "Running autoflake ..."
23 | autoflake --remove-unused-variables --in-place --recursive . --exclude=detectron2
24 | 
25 | echo "Running isort ..."
26 | isort -y -sp . --atomic
27 | 
28 | echo "Running black ..."
29 | black -l 100 . --exclude=detectron2
30 | 
31 | echo "Running flake8 ..."
32 | if [ -x "$(command -v flake8)" ]; then
33 |   flake8 .
34 | else
35 |   python3 -m flake8 .
36 | fi
37 | 
38 | 
39 | echo "Running clang-format ..."
40 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i
41 | 
42 | command -v arc > /dev/null && arc lint


--------------------------------------------------------------------------------
/dev/run_unittest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | # cd to detrex project root
4 | cd "$(dirname "${BASH_SOURCE[0]}")/.."
5 | 
6 | pytest --disable-warnings ./tests


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | html: Makefile
17 | 	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
18 | 
19 | clean: Makefile
20 | 	@rm -rf build
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ## Read detrex Documentation
 2 | The latest documentation built from this directory is available at [detrex.readthedocs.io](https://detrex.readthedocs.io/en/latest/).
 3 | 
 4 | 
 5 | ## Build detrex Documentation
 6 | 1. Install detrex according to [Installation](https://detrex.readthedocs.io/en/latest/tutorials/Installation.html).
 7 | 2. Install additional libraries and run `make html` for building the docs:
 8 | ```bash
 9 | cd ${detrex-path}/docs
10 | pip install -r requirements.txt --user
11 | make html
12 | ```
13 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx
 2 | jinja2<3.1
 3 | recommonmark==0.6.0
 4 | sphinx-rtd-theme==1.0.0
 5 | 
 6 | # Dependencies here are only those required by import
 7 | termcolor
 8 | numpy
 9 | tqdm
10 | matplotlib
11 | tabulate
12 | Pillow
13 | future
14 | cloudpickle
15 | hydra-core
16 | omegaconf==2.1.0
17 | pybind11
18 | flake8==3.8.1 
19 | isort==4.3.21
20 | black==22.3.0
21 | autoflake
22 | timm
23 | pytest
24 | scipy==1.7.3
25 | fvcore==0.1.5.post20220512
26 | # git+https://github.com/facebookresearch/fvcore.git
27 | git+https://github.com/facebookresearch/detectron2.git
28 | https://download.pytorch.org/whl/cpu/torch-1.8.1%2Bcpu-cp37-cp37m-linux_x86_64.whl
29 | https://download.pytorch.org/whl/cpu/torchvision-0.9.1%2Bcpu-cp37-cp37m-linux_x86_64.whl
30 | git+https://github.com/IDEA-Research/detrex.git
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/line_space.css:
--------------------------------------------------------------------------------
 1 | .rst-content .section ol li>*, .rst-content .section ul li>* {
 2 |   margin-top: 0px;
 3 |   margin-bottom: 0px;
 4 | }
 5 | 
 6 | .rst-content .section ol li>*, .rst-content .section li ul>* {
 7 |   margin-top: 0px;
 8 |   margin-bottom: 0px;
 9 | }
10 | 
11 | .rst-content .section ol li>*, .rst-content .section ul li ul {
12 |   margin-top: 0px;
13 |   margin-bottom: 0px;
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/source/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/_templates/.gitkeep


--------------------------------------------------------------------------------
/docs/source/_templates/line_space.html:
--------------------------------------------------------------------------------
1 | {% extends "!line_space.html" %}
2 | {% set css_files = css_files + [ "_static/css/line_space.css" ] %}
3 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. detrex documentation master file, created by
 2 |    sphinx-quickstart on Mon Nov 29 10:26:07 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to detrex's documentation!
 7 | ======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    tutorials/index
13 |    modules/index
14 |    changelog.md


--------------------------------------------------------------------------------
/docs/source/modules/detrex.config.rst:
--------------------------------------------------------------------------------
 1 | detrex.config
 2 | ##############################
 3 | 
 4 | .. currentmodule:: detrex.config
 5 | .. automodule:: detrex.config
 6 |     :members: 
 7 |         try_get_key,
 8 |         get_config,
 9 |         
10 | 


--------------------------------------------------------------------------------
/docs/source/modules/detrex.data.rst:
--------------------------------------------------------------------------------
1 | detrex.data
2 | ##############################
3 | 
4 | .. currentmodule:: detrex.data
5 | .. automodule:: detrex.data
6 |     :members: 
7 |         DetrDatasetMapper,
8 |         
9 | 


--------------------------------------------------------------------------------
/docs/source/modules/detrex.layers.rst:
--------------------------------------------------------------------------------
 1 | detrex.layers
 2 | ##############################
 3 | 
 4 | .. currentmodule:: detrex.layers
 5 | .. automodule:: detrex.layers
 6 |     :members: 
 7 |         MultiheadAttention,
 8 |         MultiScaleDeformableAttention,
 9 |         ConditionalSelfAttention,
10 |         ConditionalCrossAttention,
11 |         GenerateDNQueries,
12 |         apply_label_noise,
13 |         apply_box_noise,
14 |         FFN,
15 |         MLP,
16 |         PositionEmbeddingSine,
17 |         PositionEmbeddingLearned,
18 |         LayerNorm,
19 |         get_sine_pos_embed,
20 |         BaseTransformerLayer,
21 |         TransformerLayerSequence,
22 |         ConvNormAct,
23 |         box_cxcywh_to_xyxy,
24 |         box_xyxy_to_cxcywh,
25 |         box_iou,
26 |         generalized_box_iou,
27 |         masks_to_boxes,


--------------------------------------------------------------------------------
/docs/source/modules/detrex.modeling.rst:
--------------------------------------------------------------------------------
 1 | detrex.modeling
 2 | ##############################
 3 | 
 4 | backbone
 5 | ------------------------------
 6 | .. currentmodule:: detrex.modeling
 7 | .. automodule:: detrex.modeling.backbone
 8 |     :member-order: bysource
 9 |     :members:
10 |         ResNet,
11 |         make_stage,
12 |         ConvNeXt,
13 |         FocalNet,
14 |         TimmBackbone,
15 |         TorchvisionBackbone,
16 | 
17 | neck
18 | ------------------------------
19 | .. currentmodule:: detrex.modeling
20 | .. automodule:: detrex.modeling.neck
21 |     :member-order: bysource
22 |     :members:
23 |         ChannelMapper,
24 | 
25 | 
26 | matcher
27 | ------------------------------
28 | .. currentmodule:: detrex.modeling
29 | .. automodule:: detrex.modeling.matcher
30 |     :member-order: bysource
31 |     :members:
32 |         HungarianMatcher,
33 | 
34 | 
35 | losses
36 | ------------------------------
37 | .. currentmodule:: detrex.modeling
38 | .. automodule:: detrex.modeling.losses
39 |     :member-order: bysource
40 |     :members:
41 |         sigmoid_focal_loss,
42 |         dice_loss,


--------------------------------------------------------------------------------
/docs/source/modules/detrex.utils.rst:
--------------------------------------------------------------------------------
 1 | detrex.utils
 2 | ##############################
 3 | 
 4 | .. currentmodule:: detrex.utils
 5 | .. automodule:: detrex.utils
 6 |     :members: 
 7 |         is_dist_avail_and_initialized,
 8 |         get_world_size,
 9 |         interpolate,
10 |         inverse_sigmoid,


--------------------------------------------------------------------------------
/docs/source/modules/index.rst:
--------------------------------------------------------------------------------
 1 | API Documentation
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    detrex.config
 8 |    detrex.data
 9 |    detrex.layers
10 |    detrex.modeling
11 |    detrex.utils


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/annotation_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/annotation_demo.jpg


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/cosine_lr_scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/cosine_lr_scheduler.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/demo_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/demo_output.jpg


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/dino_prediction_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/dino_prediction_demo.jpg


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/exponential_lr_scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/exponential_lr_scheduler.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/linear_lr_scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/linear_lr_scheduler.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/multi_step_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/multi_step_example.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/multi_step_lr_scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/multi_step_lr_scheduler.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/step_lr_scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/step_lr_scheduler.png


--------------------------------------------------------------------------------
/docs/source/tutorials/assets/step_lr_with_fixed_gamma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/step_lr_with_fixed_gamma.png


--------------------------------------------------------------------------------
/docs/source/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 |    :maxdepth: 2
 7 | 
 8 |    Installation.md
 9 |    Getting_Started.md
10 |    Config_System.md
11 |    Converters.md
12 |    Download_Pretrained_Weights.md
13 |    Using_Pretrained_Backbone.md
14 |    Tools.md
15 |    Customize_Training.md
16 |    Model_Zoo.md
17 |    FAQs.md
18 |    
19 | 


--------------------------------------------------------------------------------
/projects/align_detr/configs/aligndetr_k=2_r50_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.aligndetr_r50 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | model.criterion.match_num = [2,2,2,2,2,2,1]
11 | model.criterion.tau = 1.5
12 | # modify training config
13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
14 | train.output_dir = "./output/aligndetr_k2_12ep"
15 | 
16 | # max training iterations
17 | train.max_iter = 90000 
18 | 
19 | # run evaluation every 5000 iters
20 | train.eval_period = 5000
21 | 
22 | # log training infomation every 20 iters
23 | train.log_period = 100
24 | 
25 | # save checkpoint every 5000 iters
26 | train.checkpointer.period = 10000 
27 | 
28 | # gradient clipping for training
29 | train.clip_grad.enabled = True
30 | train.clip_grad.params.max_norm = 0.1
31 | train.clip_grad.params.norm_type = 2
32 | 
33 | # set training devices
34 | train.device = "cuda"
35 | 
36 | model.device = train.device
37 | # modify optimizer config
38 | optimizer.lr = 1e-4
39 | optimizer.betas = (0.9, 0.999)
40 | optimizer.weight_decay = 1e-4
41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
42 | 
43 | # modify dataloader config
44 | dataloader.train.num_workers = 16
45 | 
46 | # please notice that this is total batch size.
47 | # surpose you're using 4 gpus for training and the batch size for
48 | # each gpu is 16/4 = 4
49 | dataloader.train.total_batch_size = 16
50 | 
51 | # dump the testing results into output_dir for visualization
52 | dataloader.evaluator.output_dir = train.output_dir
53 | 


--------------------------------------------------------------------------------
/projects/align_detr/configs/aligndetr_k=2_r50_4scale_24ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.aligndetr_r50 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | model.criterion.match_num = [2,2,2,2,2,2,1]
11 | model.criterion.tau = 1.5
12 | # modify training config
13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
14 | train.output_dir = "./output/aligndetr_k2_24ep"
15 | 
16 | # max training iterations
17 | train.max_iter = 180000 
18 | 
19 | # run evaluation every 5000 iters
20 | train.eval_period = 5000
21 | 
22 | # log training infomation every 20 iters
23 | train.log_period = 100
24 | 
25 | # save checkpoint every 5000 iters
26 | train.checkpointer.period = 10000 
27 | 
28 | # gradient clipping for training
29 | train.clip_grad.enabled = True
30 | train.clip_grad.params.max_norm = 0.1
31 | train.clip_grad.params.norm_type = 2
32 | 
33 | # set training devices
34 | train.device = "cuda"
35 | 
36 | model.device = train.device
37 | # modify optimizer config
38 | optimizer.lr = 1e-4
39 | optimizer.betas = (0.9, 0.999)
40 | optimizer.weight_decay = 1e-4
41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
42 | 
43 | # modify dataloader config
44 | dataloader.train.num_workers = 16
45 | 
46 | # please notice that this is total batch size.
47 | # surpose you're using 4 gpus for training and the batch size for
48 | # each gpu is 16/4 = 4
49 | dataloader.train.total_batch_size = 16
50 | 
51 | # dump the testing results into output_dir for visualization
52 | dataloader.evaluator.output_dir = train.output_dir
53 | 


--------------------------------------------------------------------------------
/projects/align_detr/configs/aligndetr_k=2_r50_4scale_36ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.aligndetr_r50 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | model.criterion.match_num = [2,2,2,2,2,2,1]
11 | model.criterion.tau = 1.5
12 | # modify training config
13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
14 | train.output_dir = "./output/aligndetr_k2_36ep"
15 | 
16 | # max training iterations
17 | train.max_iter = 270000 
18 | 
19 | # run evaluation every 5000 iters
20 | train.eval_period = 5000
21 | 
22 | # log training infomation every 20 iters
23 | train.log_period = 100
24 | 
25 | # save checkpoint every 5000 iters
26 | train.checkpointer.period = 10000 
27 | 
28 | # gradient clipping for training
29 | train.clip_grad.enabled = True
30 | train.clip_grad.params.max_norm = 0.1
31 | train.clip_grad.params.norm_type = 2
32 | 
33 | # set training devices
34 | train.device = "cuda"
35 | 
36 | model.device = train.device
37 | # modify optimizer config
38 | optimizer.lr = 1e-4
39 | optimizer.betas = (0.9, 0.999)
40 | optimizer.weight_decay = 1e-4
41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
42 | 
43 | # modify dataloader config
44 | dataloader.train.num_workers = 16
45 | 
46 | # please notice that this is total batch size.
47 | # surpose you're using 4 gpus for training and the batch size for
48 | # each gpu is 16/4 = 4
49 | dataloader.train.total_batch_size = 16
50 | 
51 | # dump the testing results into output_dir for visualization
52 | dataloader.evaluator.output_dir = train.output_dir
53 | 


--------------------------------------------------------------------------------
/projects/align_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .transformer import (
18 |     TransformerEncoder,
19 |     TransformerDecoder,
20 |     Transformer,
21 | )
22 | from .aligndetr import AlignDETR
23 | from .criterions import AlignDETRCriterion
24 | from .matchers import MixedMatcher


--------------------------------------------------------------------------------
/projects/align_detr/modeling/criterions/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_criterion import BaseCriterion
2 | from .aligndetr_dn_criterion import AlignDETRCriterion
3 | from .many_to_one_criterion import ManyToOneCriterion


--------------------------------------------------------------------------------
/projects/align_detr/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .losses import *


--------------------------------------------------------------------------------
/projects/align_detr/modeling/matchers/__init__.py:
--------------------------------------------------------------------------------
1 | from .mixed_matcher import MixedMatcher


--------------------------------------------------------------------------------
/projects/anchor_detr/assets/anchor_detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/anchor_detr/assets/anchor_detr_arch.png


--------------------------------------------------------------------------------
/projects/anchor_detr/configs/anchor_detr_r101_50ep.py:
--------------------------------------------------------------------------------
 1 | from .anchor_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.anchor_detr_r50 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth"
11 | train.output_dir = "./output/anchor_detr_r101_50ep"
12 | 
13 | # modify model
14 | model.backbone.name = "resnet101"
15 | 


--------------------------------------------------------------------------------
/projects/anchor_detr/configs/anchor_detr_r101_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .anchor_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.anchor_detr_r50 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth"
11 | train.output_dir = "./output/anchor_detr_r101_dc5_50ep"
12 | 
13 | # modify model
14 | model.backbone.name = "resnet101"
15 | model.backbone.dilation = True
16 | 


--------------------------------------------------------------------------------
/projects/anchor_detr/configs/anchor_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.anchor_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/anchor_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/anchor_detr/configs/anchor_detr_r50_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .anchor_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.anchor_detr_r50 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/anchor_detr_r50_dc5_50ep"
12 | 
13 | # modify model
14 | model.backbone.dilation = True
15 | 


--------------------------------------------------------------------------------
/projects/anchor_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_detr import AnchorDETR
2 | from .anchor_detr_transformer import AnchorDETRTransformer


--------------------------------------------------------------------------------
/projects/anchor_detr/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2021 megvii-model. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | 
 5 | import math
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | def pos2posemb2d(pos, num_pos_feats=128, temperature=10000):
11 |     scale = 2 * math.pi
12 |     pos = pos * scale
13 |     dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)
14 |     dim_t = temperature ** (
15 |             2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats
16 |     )
17 |     pos_x = pos[..., 0, None] / dim_t
18 |     pos_y = pos[..., 1, None] / dim_t
19 |     pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)
20 |     pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2)
21 |     posemb = torch.cat((pos_y, pos_x), dim=-1)
22 |     return posemb
23 | 
24 | 
25 | def pos2posemb1d(pos, num_pos_feats=256, temperature=10000):
26 |     scale = 2 * math.pi
27 |     pos = pos * scale
28 |     dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)
29 |     dim_t = temperature ** (
30 |             2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats
31 |     )
32 |     pos_x = pos[..., None] / dim_t
33 |     posemb = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)
34 |     return posemb
35 | 
36 | 
37 | def mask2pos(mask):
38 |     not_mask = ~mask
39 |     y_embed = not_mask[:, :, 0].cumsum(1, dtype=torch.float32)
40 |     x_embed = not_mask[:, 0, :].cumsum(1, dtype=torch.float32)
41 |     y_embed = (y_embed - 0.5) / y_embed[:, -1:]
42 |     x_embed = (x_embed - 0.5) / x_embed[:, -1:]
43 |     return y_embed, x_embed
44 | 


--------------------------------------------------------------------------------
/projects/co_mot/configs/common/dancetrack_schedule.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.solver import WarmupParamScheduler
 5 | 
 6 | 
 7 | def default_dancetrack_scheduler(epochs=50, decay_epochs=40, warmup_epochs=0, max_iter_epoch=5225):
 8 |     """
 9 |     Returns the config for a default multi-step LR scheduler such as "50epochs",
10 |     commonly referred to in papers, where every 1x has the total length of 1440k
11 |     training images (~12 COCO epochs). LR is decayed once at the end of training.
12 | 
13 |     Args:
14 |         epochs (int): total training epochs.
15 |         decay_epochs (int): lr decay steps.
16 |         warmup_epochs (int): warmup epochs.
17 | 
18 |     Returns:
19 |         DictConfig: configs that define the multiplier for LR during training
20 |     """
21 |     # total number of iterations assuming 8 batch size, using 41796/8=5225
22 |     total_steps_16bs = epochs * max_iter_epoch
23 |     decay_steps = decay_epochs * max_iter_epoch
24 |     warmup_steps = warmup_epochs * max_iter_epoch
25 |     scheduler = L(MultiStepParamScheduler)(
26 |         values=[1.0, 0.1],
27 |         milestones=[decay_steps, total_steps_16bs],
28 |     )
29 |     return L(WarmupParamScheduler)(
30 |         scheduler=scheduler,
31 |         warmup_length=warmup_steps / total_steps_16bs,
32 |         warmup_method="linear",
33 |         warmup_factor=0.001,
34 |     )
35 | 
36 | 
37 | # default scheduler for detr
38 | lr_multiplier_12ep = default_dancetrack_scheduler(12, 11, 0, 5225)
39 | 


--------------------------------------------------------------------------------
/projects/co_mot/data/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: 颜峰 && bphengyan@163.com
 3 | Date: 2023-05-31 09:24:33
 4 | LastEditors: 颜峰 && bphengyan@163.com
 5 | LastEditTime: 2023-05-31 09:24:33
 6 | FilePath: /detrex/projects/co_mot/data/__init__.py
 7 | Description: 
 8 | 
 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 
10 | '''
11 | # coding=utf-8
12 | # Copyright 2022 The IDEA Authors. All rights reserved.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | from .mot_dataset_mapper import MotDatasetMapper, MotDatasetInferenceMapper
27 | from . import datasets
28 | from .mot_build import build_mot_train_loader, build_mot_test_loader, mot_collate_fn


--------------------------------------------------------------------------------
/projects/co_mot/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: 颜峰 && bphengyan@163.com
 3 | Date: 2023-05-31 09:41:04
 4 | LastEditors: 颜峰 && bphengyan@163.com
 5 | LastEditTime: 2023-05-31 09:41:05
 6 | FilePath: /detrex/projects/co_mot/data/datasets/__init__.py
 7 | Description: 
 8 | 
 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 
10 | '''
11 | # coding=utf-8
12 | # Copyright 2022 The IDEA Authors. All rights reserved.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | # ------------------------------------------------------------------------------------------------
26 | # Copyright (c) Facebook, Inc. and its affiliates.
27 | # ------------------------------------------------------------------------------------------------
28 | 
29 | from . import (
30 |     register_dancetrack_mot,
31 | )
32 | 


--------------------------------------------------------------------------------
/projects/co_mot/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: 颜峰 && bphengyan@163.com
 3 | Date: 2023-05-31 09:41:55
 4 | LastEditors: 颜峰 && bphengyan@163.com
 5 | LastEditTime: 2023-05-31 09:41:56
 6 | FilePath: /detrex/projects/co_mot/data/transforms/__init__.py
 7 | Description: 
 8 | 
 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 
10 | '''
11 | # coding=utf-8
12 | # Copyright 2022 The IDEA Authors. All rights reserved.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | from . import mot_transforms 


--------------------------------------------------------------------------------
/projects/co_mot/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .dancetrack_evaluation import DancetrackEvaluator
3 | 
4 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
5 | 


--------------------------------------------------------------------------------
/projects/co_mot/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: 颜峰 && bphengyan@163.com
 3 | Date: 2023-05-26 10:06:20
 4 | LastEditors: 颜峰 && bphengyan@163.com
 5 | LastEditTime: 2023-05-30 16:03:02
 6 | FilePath: /detrex/projects/co_mot/modeling/__init__.py
 7 | Description: 
 8 | 
 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 
10 | '''
11 | # coding=utf-8
12 | # Copyright 2022 The IDEA Authors. All rights reserved.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | 
27 | from .mot import MOT
28 | from .mot import ClipMatcher as MOTClipMatcher
29 | from .mot import TrackerPostProcess as MOTTrackerPostProcess
30 | from .mot import RuntimeTrackerBase as MOTRuntimeTrackerBase
31 | 
32 | from .mot_transformer import DeformableTransformer as MOTDeformableTransformer
33 | 
34 | from .qim import QueryInteractionModuleGroup as MOTQueryInteractionModuleGroup
35 | 
36 | from .matcher import HungarianMatcherGroup as MOTHungarianMatcherGroup
37 | 
38 | 


--------------------------------------------------------------------------------
/projects/co_mot/util/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 megvii-research. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR)
 5 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 6 | # ------------------------------------------------------------------------
 7 | # Modified from DETR (https://github.com/facebookresearch/detr)
 8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 9 | # ------------------------------------------------------------------------
10 | 
11 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/assets/attention-maps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/conditional_detr/assets/attention-maps.png


--------------------------------------------------------------------------------
/projects/conditional_detr/configs/conditional_detr_r101_50ep.py:
--------------------------------------------------------------------------------
 1 | from .conditional_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify model config
10 | model.backbone.stages.depth = 101
11 | 
12 | # modify training config
13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
14 | train.output_dir = "./output/conditional_detr_r101_50ep"
15 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/configs/conditional_detr_r101_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .conditional_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.conditional_detr_r50_dc5 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth"
11 | train.output_dir = "./output/conditional_detr_r101_dc5_50ep"
12 | 
13 | # modify model
14 | model.backbone.name = "resnet101"
15 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/configs/conditional_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.conditional_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/conditional_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/configs/conditional_detr_r50_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .conditional_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.conditional_detr_r50_dc5 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/conditional_detr_r50_dc5_50ep"
12 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/configs/models/conditional_detr_r50_dc5.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet
 3 | 
 4 | from .conditional_detr_r50 import model
 5 | 
 6 | 
 7 | model.backbone=L(TorchvisionResNet)(
 8 |     name="resnet50",
 9 |     train_backbone=True,
10 |     dilation=True,
11 |     return_layers={"layer4": "res5"}
12 | )
13 | 


--------------------------------------------------------------------------------
/projects/conditional_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .conditional_detr import ConditionalDETR
2 | from .conditional_transformer import (
3 |     ConditionalDetrTransformerEncoder,
4 |     ConditionalDetrTransformerDecoder,
5 |     ConditionalDetrTransformer,
6 | )
7 | 


--------------------------------------------------------------------------------
/projects/dab_deformable_detr/assets/dab_detr_overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_deformable_detr/assets/dab_detr_overall.png


--------------------------------------------------------------------------------
/projects/dab_deformable_detr/configs/dab_deformable_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dab_deformable_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dab_deformable_detr_r50_50ep"
12 | 
13 | # set training seed
14 | train.seed = 42
15 | 
16 | # max training iterations
17 | train.max_iter = 375000
18 | 
19 | # run evaluation every 5000 iters
20 | train.eval_period = 5000
21 | 
22 | # log training infomation every 20 iters
23 | train.log_period = 20
24 | 
25 | # save checkpoint every 5000 iters
26 | train.checkpointer.period = 5000
27 | 
28 | # gradient clipping for training
29 | train.clip_grad.enabled = True
30 | train.clip_grad.params.max_norm = 0.1
31 | train.clip_grad.params.norm_type = 2
32 | 
33 | # set training devices
34 | train.device = "cuda"
35 | model.device = train.device
36 | 
37 | # modify optimizer config
38 | optimizer.lr = 1e-4
39 | optimizer.betas = (0.9, 0.999)
40 | optimizer.weight_decay = 1e-4
41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
42 | 
43 | # modify dataloader config
44 | dataloader.train.num_workers = 16
45 | 
46 | # please notice that this is total batch size.
47 | # surpose you're using 4 gpus for training and the batch size for
48 | # each gpu is 16/4 = 4
49 | dataloader.train.total_batch_size = 16
50 | 
51 | # dump the testing results into output_dir for visualization
52 | dataloader.evaluator.output_dir = train.output_dir
53 | 


--------------------------------------------------------------------------------
/projects/dab_deformable_detr/configs/dab_deformable_detr_r50_two_stage_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_deformable_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dab_deformable_detr_r50_two_stage_50ep"
12 | 
13 | # modify model config
14 | model.as_two_stage = True
15 | 
16 | # modify loss weight dict
17 | # this is an hack implementation which will be improved in the future
18 | aux_weight_dict = {
19 |     "loss_class_enc": 1.0,
20 |     "loss_bbox_enc": 5.0,
21 |     "loss_giou_enc": 2.0,
22 | }
23 | model.criterion.weight_dict.update(aux_weight_dict)
24 | 


--------------------------------------------------------------------------------
/projects/dab_deformable_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .dab_deformable_transformer import (
18 |     DabDeformableDetrTransformerEncoder,
19 |     DabDeformableDetrTransformerDecoder,
20 |     DabDeformableDetrTransformer,
21 | )
22 | from .dab_deformable_detr import DabDeformableDETR
23 | from .two_stage_criterion import TwoStageCriterion
24 | 


--------------------------------------------------------------------------------
/projects/dab_detr/assets/dab_detr_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_detr/assets/dab_detr_details.png


--------------------------------------------------------------------------------
/projects/dab_detr/assets/dab_detr_overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_detr/assets/dab_detr_overall.png


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r101_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "path/to/R-101.pkl"
11 | train.output_dir = "./output/dab_detr_r101_50ep"
12 | 
13 | # modify model config
14 | model.backbone.stages.depth = 101
15 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r101_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.dab_detr_r50_dc5 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth"
11 | train.output_dir = "./output/dab_detr_r101_dc5_50ep"
12 | 
13 | # modify model
14 | model.backbone.name = "resnet101"


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r50_3patterns_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dab_detr_r50_3patterns_50ep"
12 | 
13 | # using 3 pattern embeddings as in Anchor-DETR
14 | model.transformer.num_patterns = 3


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dab_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # initialize checkpoint to be loaded
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dab_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r50_dc5_3patterns_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_dc5_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/dab_detr_r50_dc5_3patterns_50ep"
12 | 
13 | # using 3 pattern embeddings as in Anchor-DETR
14 | model.transformer.num_patterns = 3
15 | 
16 | # modify model
17 | model.position_embedding.temperature = 20


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_r50_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.dab_detr_r50_dc5 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/dab_detr_r50_dc5_50ep"
12 | 
13 | # modify model
14 | # DAB-DETR using 10 temperature for DC5 model
15 | model.position_embedding.temperature = 10


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_swin_b_in21k_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.dab_detr_swin_base import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/swin/swin_base_patch4_window7_224_22k.pth"
11 | train.output_dir = "./output/dab_detr_swin_b_in21k_50ep"
12 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/dab_detr_swin_t_in1k_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dab_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.dab_detr_swin_tiny import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "path/to/swin_tiny_patch4_window7_224.pth"
11 | train.output_dir = "./output/dab_detr_swin_tiny_in1k_50ep"
12 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/models/dab_detr_r50_3patterns.py:
--------------------------------------------------------------------------------
1 | from .dab_detr_r50 import model
2 | 
3 | 
4 | # using 3 pattern embeddings as in Anchor-DETR
5 | model.transformer.num_patterns = 3
6 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/models/dab_detr_r50_dc5.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet
 3 | 
 4 | from .dab_detr_r50 import model
 5 | 
 6 | 
 7 | model.backbone=L(TorchvisionResNet)(
 8 |     name="resnet50",
 9 |     train_backbone=True,
10 |     dilation=True,
11 |     return_layers={"layer4": "res5"}
12 | )
13 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/models/dab_detr_swin_base.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.modeling.backbone import SwinTransformer
 3 | 
 4 | from .dab_detr_r50 import model
 5 | 
 6 | 
 7 | model.backbone = L(SwinTransformer)(
 8 |     embed_dim=128,
 9 |     depths=(2, 2, 18, 2),
10 |     num_heads=(4, 8, 16, 32),
11 |     drop_path_rate=0.4,
12 |     out_indices=(3,),
13 | )
14 | model.in_features = ["p3"]
15 | model.in_channels = 1024
16 | 


--------------------------------------------------------------------------------
/projects/dab_detr/configs/models/dab_detr_swin_tiny.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.modeling.backbone import SwinTransformer
 3 | 
 4 | from .dab_detr_r50 import model
 5 | 
 6 | 
 7 | model.backbone = L(SwinTransformer)(
 8 |     embed_dim=96,
 9 |     depths=(2, 2, 6, 2),
10 |     num_heads=(3, 6, 12, 24),
11 |     drop_path_rate=0.1,
12 |     out_indices=(3,),
13 | )
14 | model.in_features = ["p3"]
15 | model.in_channels = 768
16 | 


--------------------------------------------------------------------------------
/projects/dab_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .dab_detr import DABDETR
2 | from .dab_transformer import (
3 |     DabDetrTransformerEncoder,
4 |     DabDetrTransformerDecoder,
5 |     DabDetrTransformer,
6 | )
7 | 


--------------------------------------------------------------------------------
/projects/deformable_detr/assets/deformable_detr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/deformable_detr/assets/deformable_detr.png


--------------------------------------------------------------------------------
/projects/deformable_detr/configs/deformable_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.deformable_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/deformable_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/deformable_detr/configs/deformable_detr_r50_two_stage_50ep.py:
--------------------------------------------------------------------------------
 1 | from .deformable_detr_r50_50ep import train, dataloader, optimizer, lr_multiplier, model
 2 | 
 3 | # modify model config
 4 | model.with_box_refine = True
 5 | model.as_two_stage = True
 6 | 
 7 | # modify training config
 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 9 | train.output_dir = "./output/deformable_detr_r50_two_stage_50ep"
10 | 


--------------------------------------------------------------------------------
/projects/deformable_detr/configs/deformable_detr_r50_with_box_refinement_50ep.py:
--------------------------------------------------------------------------------
1 | from .deformable_detr_r50_50ep import train, dataloader, optimizer, lr_multiplier, model
2 | 
3 | # modify model config
4 | model.with_box_refine = True
5 | 
6 | # modify training config
7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
8 | train.output_dir = "./output/deformable_detr_with_box_refinement_50ep"
9 | 


--------------------------------------------------------------------------------
/projects/deformable_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .deformable_transformer import (
17 |     DeformableDetrTransformerEncoder,
18 |     DeformableDetrTransformerDecoder,
19 |     DeformableDetrTransformer,
20 | )
21 | from .deformable_detr import DeformableDETR
22 | from .deformable_criterion import DeformableCriterion
23 | 


--------------------------------------------------------------------------------
/projects/deta/assets/deta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/deta/assets/deta.png


--------------------------------------------------------------------------------
/projects/deta/configs/deta_r50_5scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.deta_r50 import model
 3 | from .scheduler.coco_scheduler import lr_multiplier_12ep_10drop as lr_multiplier
 4 | 
 5 | # using the default optimizer and dataloader
 6 | dataloader = get_config("common/data/coco_detr.py").dataloader
 7 | optimizer = get_config("common/optim.py").AdamW
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
12 | train.output_dir = "./output/deta_r50_5scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 7500
17 | train.checkpointer.period = 7500
18 | 
19 | # set training devices
20 | train.device = "cuda"
21 | model.device = train.device
22 | 
23 | # modify dataloader config
24 | dataloader.train.num_workers = 16
25 | 
26 | # please notice that this is total batch size.
27 | # surpose you're using 4 gpus for training and the batch size for
28 | # each gpu is 16/4 = 4
29 | dataloader.train.total_batch_size = 16
30 | 
31 | 


--------------------------------------------------------------------------------
/projects/deta/configs/deta_r50_5scale_12ep_bs8.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.deta_r50 import model
 3 | from .scheduler.coco_scheduler import lr_multiplier_12ep_8bs_scheduler as lr_multiplier
 4 | 
 5 | # using the default optimizer and dataloader
 6 | dataloader = get_config("common/data/coco_detr.py").dataloader
 7 | optimizer = get_config("common/optim.py").AdamW
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
12 | train.output_dir = "./output/deta_r50_5scale_12ep_bs8"
13 | 
14 | # max training iterations
15 | train.max_iter = 180000
16 | train.eval_period = 15000
17 | train.checkpointer.period = 15000
18 | 
19 | 
20 | # only freeze stem during training
21 | model.backbone.freeze_at = 1 
22 | 
23 | 
24 | # modify optimizer config
25 | optimizer.lr = 1e-4
26 | optimizer.betas = (0.9, 0.999)
27 | optimizer.weight_decay = 1e-4
28 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
29 | 
30 | # please notice that this is total batch size.
31 | # surpose you're using 4 gpus for training and the batch size for
32 | # each gpu is 16/4 = 4
33 | dataloader.train.total_batch_size = 8
34 | 
35 | 


--------------------------------------------------------------------------------
/projects/deta/configs/deta_r50_5scale_no_frozen_backbone.py:
--------------------------------------------------------------------------------
1 | from .deta_r50_5scale_12ep import (
2 |     model,
3 |     train,
4 |     dataloader,
5 |     lr_multiplier
6 | )
7 | 
8 | model.backbone.freeze_at = 1
9 | 


--------------------------------------------------------------------------------
/projects/deta/configs/deta_swin_large_finetune_24ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .deta_r50_5scale_12ep import (
 3 |     train,
 4 |     optimizer,
 5 | )
 6 | 
 7 | from .models.deta_swin import model
 8 | from .data.coco_detr_larger import dataloader
 9 | 
10 | # 24ep for finetuning
11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep
12 | 
13 | # modify learning rate
14 | optimizer.lr = 5e-5
15 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
16 | 
17 | 


--------------------------------------------------------------------------------
/projects/deta/configs/improved_deformable_detr_baseline_50ep.py:
--------------------------------------------------------------------------------
 1 | from .deta_r50_5scale_12ep import (
 2 |     train,
 3 |     model,
 4 |     dataloader,
 5 |     lr_multiplier,
 6 |     optimizer,
 7 | )
 8 | 
 9 | model.transformer.assign_first_stage = False
10 | model.criterion.assign_first_stage = False
11 | model.criterion.assign_second_stage = False
12 | 


--------------------------------------------------------------------------------
/projects/deta/configs/models/deta_swin.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .deta_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=384,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     window_size=12,
15 |     out_indices=(1, 2, 3),
16 | )
17 | 
18 | # modify neck config
19 | model.neck.input_shapes = {
20 |     "p1": ShapeSpec(channels=384),
21 |     "p2": ShapeSpec(channels=768),
22 |     "p3": ShapeSpec(channels=1536),
23 | }
24 | model.neck.in_features = ["p1", "p2", "p3"]
25 | 


--------------------------------------------------------------------------------
/projects/deta/configs/scheduler/coco_scheduler.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.solver import WarmupParamScheduler
 5 | 
 6 | 
 7 | def default_coco_scheduler(epochs=50, decay_epochs=40, warmup_epochs=0):
 8 |     """
 9 |     Returns the config for a default multi-step LR scheduler such as "50epochs",
10 |     commonly referred to in papers, where every 1x has the total length of 1440k
11 |     training images (~12 COCO epochs). LR is decayed once at the end of training.
12 | 
13 |     Args:
14 |         epochs (int): total training epochs.
15 |         decay_epochs (int): lr decay steps.
16 |         warmup_epochs (int): warmup epochs.
17 | 
18 |     Returns:
19 |         DictConfig: configs that define the multiplier for LR during training
20 |     """
21 |     # total number of iterations assuming 16 batch size, using 1440000/16=90000
22 |     total_steps_16bs = epochs * 7500
23 |     decay_steps = decay_epochs * 7500
24 |     warmup_steps = warmup_epochs * 7500
25 |     scheduler = L(MultiStepParamScheduler)(
26 |         values=[1.0, 0.1],
27 |         milestones=[decay_steps, total_steps_16bs],
28 |     )
29 |     return L(WarmupParamScheduler)(
30 |         scheduler=scheduler,
31 |         warmup_length=warmup_steps / total_steps_16bs,
32 |         warmup_method="linear",
33 |         warmup_factor=0.001,
34 |     )
35 | 
36 | 
37 | # default scheduler for detr
38 | lr_multiplier_12ep_10drop = default_coco_scheduler(12, 10, 0)
39 | lr_multiplier_12ep_8bs_scheduler = default_coco_scheduler(24, 20, 0)
40 | 


--------------------------------------------------------------------------------
/projects/deta/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .deformable_transformer import (
17 |     DeformableDetrTransformerEncoder,
18 |     DeformableDetrTransformerDecoder,
19 |     DeformableDetrTransformer,
20 | )
21 | from .deformable_detr import DeformableDETR
22 | from .deta_criterion import DETACriterion
23 | from .assigner import Stage1Assigner, Stage2Assigner


--------------------------------------------------------------------------------
/projects/detr/assets/DETR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/detr/assets/DETR.png


--------------------------------------------------------------------------------
/projects/detr/configs/detr_r101_300ep.py:
--------------------------------------------------------------------------------
1 | from .detr_r50_300ep import train, dataloader, optimizer, lr_multiplier, model
2 | 
3 | # modify model config
4 | model.backbone.stages.depth = 101
5 | 
6 | # modify training config
7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
8 | train.output_dir = "./output/detr_r101_300ep"
9 | 


--------------------------------------------------------------------------------
/projects/detr/configs/detr_r101_dc5_300ep.py:
--------------------------------------------------------------------------------
 1 | from .detr_r50_300ep import dataloader, lr_multiplier, optimizer, train
 2 | 
 3 | from .models.detr_r50_dc5 import model
 4 | 
 5 | # modify training config
 6 | # using torchvision official checkpoint
 7 | # the urls can be found in: https://pytorch.org/vision/stable/models/resnet.html
 8 | 
 9 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth"
10 | train.output_dir = "./output/detr_r50_dc5_300ep"
11 | 
12 | # modify model
13 | model.backbone.name = "resnet101"


--------------------------------------------------------------------------------
/projects/detr/configs/detr_r50_300ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/detr_r50_300ep"
12 | train.max_iter = 554400
13 | 
14 | # modify lr_multiplier
15 | lr_multiplier.scheduler.milestones = [369600, 554400]
16 | 
17 | # modify optimizer config
18 | optimizer.weight_decay = 1e-4
19 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
20 | 
21 | # modify dataloader config
22 | dataloader.train.num_workers = 16
23 | dataloader.train.total_batch_size = 64
24 | 


--------------------------------------------------------------------------------
/projects/detr/configs/detr_r50_dc5_300ep.py:
--------------------------------------------------------------------------------
 1 | from .detr_r50_300ep import dataloader, lr_multiplier, optimizer, train
 2 | 
 3 | from .models.detr_r50_dc5 import model
 4 | 
 5 | # modify training config
 6 | # using torchvision official checkpoint
 7 | # the urls can be found in: https://pytorch.org/vision/stable/models/resnet.html
 8 | 
 9 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
10 | train.output_dir = "./output/detr_r50_dc5_300ep"
11 | 
12 | 


--------------------------------------------------------------------------------
/projects/detr/configs/models/detr_r50_dc5.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | 
 3 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet
 4 | 
 5 | from .detr_r50 import model
 6 | 
 7 | model.backbone=L(TorchvisionResNet)(
 8 |     name="resnet50",
 9 |     train_backbone=True,
10 |     dilation=True,
11 |     return_layers={"layer4": "res5"}
12 | )
13 | 


--------------------------------------------------------------------------------
/projects/detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .detr import DETR
2 | from .transformer import (
3 |     DetrTransformerEncoder,
4 |     DetrTransformerDecoder,
5 |     DetrTransformer,
6 | )
7 | 


--------------------------------------------------------------------------------
/projects/dino/assets/dino_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino/assets/dino_arch.png


--------------------------------------------------------------------------------
/projects/dino/configs/dino-convnext/dino_convnext_base_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_convnext_large_384_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to convnext-base version
12 | model.backbone.depths = [3, 3, 27, 3]
13 | model.backbone.dims = [128, 256, 512, 1024]
14 | 
15 | # modify neck config
16 | model.neck.input_shapes = {
17 |     "p1": ShapeSpec(channels=256),
18 |     "p2": ShapeSpec(channels=512),
19 |     "p3": ShapeSpec(channels=1024),
20 | }
21 | model.neck.in_features = ["p1", "p2", "p3"]
22 | 
23 | # modify training config
24 | train.init_checkpoint = "/path/to/convnext_base_22k_1k_384.pth"
25 | train.output_dir = "./output/dino_convnext_base_384_4scale_12ep"
26 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-convnext/dino_convnext_large_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_convnext import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | # use convnext-large-384 as default
12 | train.init_checkpoint = "/path/to/convnext_large_22k_1k_384.pth"
13 | train.output_dir = "./output/dino_convnext_large_4scale_12ep"
14 | 
15 | # max training iterations
16 | train.max_iter = 90000
17 | train.eval_period = 5000
18 | train.log_period = 20
19 | train.checkpointer.period = 5000
20 | 
21 | # gradient clipping for training
22 | train.clip_grad.enabled = True
23 | train.clip_grad.params.max_norm = 0.1
24 | train.clip_grad.params.norm_type = 2
25 | 
26 | # set training devices
27 | train.device = "cuda"
28 | model.device = train.device
29 | 
30 | # modify optimizer config
31 | optimizer.lr = 1e-4
32 | optimizer.betas = (0.9, 0.999)
33 | optimizer.weight_decay = 1e-4
34 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
35 | 
36 | # modify dataloader config
37 | dataloader.train.num_workers = 16
38 | 
39 | # please notice that this is total batch size.
40 | # surpose you're using 4 gpus for training and the batch size for
41 | # each gpu is 16/4 = 4
42 | dataloader.train.total_batch_size = 16
43 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-convnext/dino_convnext_small_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_convnext_large_384_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to convnext-small version
12 | model.backbone.depths = [3, 3, 27, 3]
13 | model.backbone.dims = [96, 192, 384, 768]
14 | 
15 | # modify neck config
16 | model.neck.input_shapes = {
17 |     "p1": ShapeSpec(channels=192),
18 |     "p2": ShapeSpec(channels=384),
19 |     "p3": ShapeSpec(channels=768),
20 | }
21 | model.neck.in_features = ["p1", "p2", "p3"]
22 | 
23 | # modify training config
24 | train.init_checkpoint = "/path/to/convnext_small_22k_1k_384.pth"
25 | train.output_dir = "./output/dino_convnext_small_384_4scale_12ep"
26 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-convnext/dino_convnext_tiny_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_convnext_large_384_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to tiny version
12 | model.backbone.depths = [3, 3, 9, 3]
13 | model.backbone.dims = [96, 192, 384, 768]
14 | 
15 | # modify neck config
16 | model.neck.input_shapes = {
17 |     "p1": ShapeSpec(channels=192),
18 |     "p2": ShapeSpec(channels=384),
19 |     "p3": ShapeSpec(channels=768),
20 | }
21 | model.neck.in_features = ["p1", "p2", "p3"]
22 | 
23 | # modify training config
24 | train.init_checkpoint = "/path/to/convnext_tiny_22k_1k_384.pth"
25 | train.output_dir = "./output/dino_convnext_tiny_384_4scale_12ep"
26 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-eva-01/dino_eva_01_1536_4scale_12ep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino/configs/dino-eva-01/dino_eva_01_1536_4scale_12ep.py


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focal_base_lrf_fl3_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 3 |     train,
 4 |     dataloader,
 5 |     optimizer,
 6 |     lr_multiplier,
 7 |     model,
 8 | )
 9 | from .focalnet import FocalNet
10 | from detectron2.config import LazyCall as L
11 | 
12 | 
13 | # modify training config
14 | train.init_checkpoint = "/path/to/focalnet_base_lrf.pth"
15 | train.output_dir = "./output/dino_focal_small_lrf_fl3_4scale_12ep"
16 | 
17 | 
18 | # convert to focal-small 3level
19 | # model.backbone.embed_dim = 128
20 | # model.backbone.depths = (2, 2, 18, 2)
21 | # model.backbone.focal_levels = (3, 3, 3, 3)
22 | # model.backbone.focal_windows = (3, 3, 3, 3)
23 | # model.backbone.drop_path_rate = 0.1
24 | # model.backbone.use_conv_embed = False
25 | # model.backbone.patch_norm = True
26 | # model.backbone.use_postln = False
27 | 
28 | model.backbone = L(FocalNet)(
29 |     embed_dim=128,
30 |     depths=(2, 2, 18, 2),
31 |     focal_levels=(3, 3, 3, 3),
32 |     focal_windows=(3, 3, 3, 3),
33 |     drop_path_rate=0.1,
34 |     use_conv_embed=False,
35 |     out_indices=(1, 2, 3),
36 | )
37 | 
38 | # modify neck config
39 | model.neck.input_shapes = {
40 |     "p1": ShapeSpec(channels=256),
41 |     "p2": ShapeSpec(channels=512),
42 |     "p3": ShapeSpec(channels=1024),
43 | }
44 | model.neck.in_features = ["p1", "p2", "p3"]
45 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focal_small_lrf_fl3_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 3 |     train,
 4 |     dataloader,
 5 |     optimizer,
 6 |     lr_multiplier,
 7 |     model,
 8 | )
 9 | from .focalnet import FocalNet
10 | from detectron2.config import LazyCall as L
11 | 
12 | 
13 | # modify training config
14 | train.init_checkpoint = "/path/to/focalnet_small_lrf.pth"
15 | train.output_dir = "./output/dino_focal_small_lrf_fl3_4scale_12ep"
16 | 
17 | 
18 | # convert to focal-small 3level
19 | # model.backbone.embed_dim = 96
20 | # model.backbone.depths = (2, 2, 18, 2)
21 | # model.backbone.focal_levels = (3, 3, 3, 3)
22 | # model.backbone.focal_windows = (3, 3, 3, 3)
23 | # model.backbone.drop_path_rate = 0.1
24 | # model.backbone.use_conv_embed = False
25 | # model.backbone.patch_norm = True
26 | # model.backbone.use_postln = False
27 | 
28 | model.backbone = L(FocalNet)(
29 |     embed_dim=96,
30 |     depths=(2, 2, 18, 2),
31 |     focal_levels=(3, 3, 3, 3),
32 |     focal_windows=(3, 3, 3, 3),
33 |     drop_path_rate=0.1,
34 |     use_conv_embed=False,
35 |     out_indices=(1, 2, 3),
36 | )
37 | 
38 | # modify neck config
39 | model.neck.input_shapes = {
40 |     "p1": ShapeSpec(channels=192),
41 |     "p2": ShapeSpec(channels=384),
42 |     "p3": ShapeSpec(channels=768),
43 | }
44 | model.neck.in_features = ["p1", "p2", "p3"]
45 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focal_tiny_lrf_fl3_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 3 |     train,
 4 |     dataloader,
 5 |     optimizer,
 6 |     lr_multiplier,
 7 |     model,
 8 | )
 9 | from .focalnet import FocalNet
10 | from detectron2.config import LazyCall as L
11 | 
12 | 
13 | # modify training config
14 | train.init_checkpoint = "/path/to/focalnet_tiny_lrf.pth"
15 | train.output_dir = "./output/dino_focal_tiny_lrf_fl3_4scale_12ep"
16 | 
17 | 
18 | # convert to focal-tiny 3level
19 | # model.backbone.embed_dim = 96
20 | # model.backbone.depths = (2, 2, 6, 2)
21 | # model.backbone.focal_levels = (3, 3, 3, 3)
22 | # model.backbone.focal_windows = (3, 3, 3, 3)
23 | # model.backbone.drop_path_rate = 0.1
24 | # model.backbone.use_conv_embed = False
25 | # model.backbone.patch_norm = True
26 | # model.backbone.use_postln = False
27 | 
28 | model.backbone = L(FocalNet)(
29 |     embed_dim=96,
30 |     depths=(2, 2, 6, 2),
31 |     focal_levels=(3, 3, 3, 3),
32 |     focal_windows=(3, 3, 3, 3),
33 |     drop_path_rate=0.1,
34 |     use_conv_embed=False,
35 |     out_indices=(1, 2, 3),
36 | )
37 | 
38 | # modify neck config
39 | model.neck.input_shapes = {
40 |     "p1": ShapeSpec(channels=192),
41 |     "p2": ShapeSpec(channels=384),
42 |     "p3": ShapeSpec(channels=768),
43 | }
44 | model.neck.in_features = ["p1", "p2", "p3"]
45 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_focalnet import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384.pth"
12 | train.output_dir = "./output/dino_focalnet_large_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_4scale_36ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 2 |     model,
 3 |     dataloader,
 4 |     train,
 5 |     lr_multiplier,
 6 |     optimizer
 7 | )
 8 | 
 9 | from detrex.config import get_config
10 | 
11 | # using 36ep scheduler
12 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
13 | 
14 | # modify training config
15 | train.max_iter = 270000
16 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384.pth"
17 | train.output_dir = "./output/dino_focalnet_large_4scale_36ep"
18 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth"
12 | train.output_dir = "./output/dino_focalnet_large_fl4_4scale_12ep"
13 | 
14 | 
15 | # convert to 4 focal-level
16 | model.backbone.focal_levels = (4, 4, 4, 4)
17 | model.backbone.focal_windows = (3, 3, 3, 3)
18 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_5scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | from detectron2.layers import ShapeSpec
10 | 
11 | 
12 | # modify training config
13 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth"
14 | train.output_dir = "./output/dino_focalnet_large_fl4_5scale_12ep"
15 | 
16 | # convert to 4 focal-level
17 | model.backbone.focal_levels = (4, 4, 4, 4)
18 | model.backbone.focal_windows = (3, 3, 3, 3)
19 | 
20 | # convert to 5 scale output features
21 | model.backbone.out_indices = (0, 1, 2, 3)
22 | model.neck.input_shapes = {
23 |     "p0": ShapeSpec(channels=192),
24 |     "p1": ShapeSpec(channels=384),
25 |     "p2": ShapeSpec(channels=768),
26 |     "p3": ShapeSpec(channels=1536),
27 | }
28 | model.neck.in_features = ["p0", "p1", "p2", "p3"]
29 | model.neck.num_outs = 5
30 | model.transformer.num_feature_levels = 5
31 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_5scale_36ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | 
 3 | from .dino_focalnet_large_lrf_384_fl4_5scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     model,
 8 | )
 9 | 
10 | # using 36ep scheduler
11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
12 | 
13 | # modify training config
14 | train.max_iter = 270000
15 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth"
16 | train.output_dir = "./output/dino_focalnet_large_fl4_5scale_36ep"
17 | 
18 | # using larger drop-path rate for longer training times
19 | model.backbone.drop_path_rate = 0.4
20 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-internimage/dino_internimage_base_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_internimage_large_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to internimage-small version
12 | model.backbone.channels = 112
13 | model.backbone.depths = [4, 4, 21, 4]
14 | model.backbone.groups = [7, 14, 28, 56]
15 | model.backbone.offset_scale = 1.0
16 | model.backbone.drop_path_rate = 0.1
17 | model.backbone.post_norm = True
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=224),
22 |     "p2": ShapeSpec(channels=448),
23 |     "p3": ShapeSpec(channels=896),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | 
27 | # modify training config
28 | train.init_checkpoint = "/path/to/internimage_b_1k_224.pth"
29 | train.output_dir = "./output/dino_internimage_base_4scale_12ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-internimage/dino_internimage_large_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_internimage import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/internimage_l_22kto1k_384.pth"
12 | train.output_dir = "./output/dino_internimage_large_384_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-internimage/dino_internimage_small_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_internimage_large_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to internimage-small version
12 | model.backbone.channels = 80
13 | model.backbone.depths = [4, 4, 21, 4]
14 | model.backbone.groups = [5, 10, 20, 40]
15 | model.backbone.offset_scale = 1.0
16 | model.backbone.drop_path_rate = 0.1
17 | model.backbone.post_norm = True
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=160),
22 |     "p2": ShapeSpec(channels=320),
23 |     "p3": ShapeSpec(channels=640),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | 
27 | # modify training config
28 | train.init_checkpoint = "/path/to/internimage_s_1k_224.pth"
29 | train.output_dir = "./output/dino_internimage_small_4scale_12ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-internimage/dino_internimage_tiny_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | 
 3 | from .dino_internimage_large_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 |     model,
 9 | )
10 | 
11 | # modify model to internimage-tiny version
12 | model.backbone.channels = 64
13 | model.backbone.depths = [4, 4, 18, 4]
14 | model.backbone.groups = [4, 8, 16, 32]
15 | model.backbone.offset_scale = 1.0
16 | model.backbone.drop_path_rate = 0.1
17 | model.backbone.post_norm = False
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=128),
22 |     "p2": ShapeSpec(channels=256),
23 |     "p3": ShapeSpec(channels=512),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | 
27 | # modify training config
28 | train.init_checkpoint = "/path/to/internimage_t_1k_224.pth"
29 | train.output_dir = "./output/dino_internimage_tiny_4scale_12ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r101_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_r50_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "/path/to/r101.pkl"
11 | train.output_dir = "./output/dino_r101_4scale_12ep"
12 | 
13 | # modify model config
14 | model.backbone.stages.depth = 101
15 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_r50 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
12 | train.output_dir = "./output/dino_r50_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 
43 | # dump the testing results into output_dir for visualization
44 | dataloader.evaluator.output_dir = train.output_dir
45 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_300dn.py:
--------------------------------------------------------------------------------
 1 | from .dino_r50_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify model config
10 | model.dn_number = 300
11 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_better_hyper.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from .dino_r50_4scale_12ep import (
 3 |     train,
 4 |     dataloader,
 5 |     optimizer,
 6 |     lr_multiplier,
 7 |     model,
 8 | )
 9 | 
10 | # no frozen backbone get better results
11 | model.backbone.freeze_at = -1
12 | 
13 | # more dn queries, set 300 here
14 | model.dn_number = 300
15 | 
16 | # use 2.0 for class weight
17 | model.criterion.weight_dict = {
18 |     "loss_class": 2.0,
19 |     "loss_bbox": 5.0,
20 |     "loss_giou": 2.0,
21 |     "loss_class_dn": 1,
22 |     "loss_bbox_dn": 5.0,
23 |     "loss_giou_dn": 2.0,
24 | }
25 | 
26 | # set aux loss weight dict
27 | base_weight_dict = copy.deepcopy(model.criterion.weight_dict)
28 | if model.aux_loss:
29 |     weight_dict = model.criterion.weight_dict
30 |     aux_weight_dict = {}
31 |     aux_weight_dict.update({k + "_enc": v for k, v in base_weight_dict.items()})
32 |     for i in range(model.transformer.decoder.num_layers - 1):
33 |         aux_weight_dict.update({k + f"_{i}": v for k, v in base_weight_dict.items()})
34 |     weight_dict.update(aux_weight_dict)
35 |     model.criterion.weight_dict = weight_dict
36 | 
37 | # output dir
38 | train.output_dir = "./output/dino_r50_4scale_12ep_better_hyper"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_no_frozen.py:
--------------------------------------------------------------------------------
 1 | from .dino_r50_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # no frozen backbone get better results
10 | model.backbone.freeze_at = -1
11 | 
12 | train.output_dir = "./output/dino_r50_4scale_12ep_no_frozen_backbone"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_4scale_24ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .dino_r50_4scale_12ep import (
 3 |     train,
 4 |     dataloader,
 5 |     optimizer,
 6 |     model,
 7 | )
 8 | 
 9 | # get default config
10 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep
11 | 
12 | # modify model config
13 | # use the original implementation of dab-detr position embedding in 24 epochs training.
14 | model.position_embedding.temperature = 20
15 | model.position_embedding.offset = 0.0
16 | 
17 | # modify training config
18 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
19 | train.output_dir = "./output/dino_r50_4scale_24ep"
20 | 
21 | # max training iterations
22 | train.max_iter = 180000
23 | 
24 | # modify dataloader config
25 | # not filter empty annotations during training
26 | dataloader.train.dataset.filter_empty = False
27 | dataloader.train.num_workers = 16
28 | 
29 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-resnet/dino_r50_5scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_r50_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | from detectron2.layers import ShapeSpec
10 | 
11 | # modify model config to generate 4 scale backbone features 
12 | # and 5 scale input features
13 | model.backbone.out_features = ["res2", "res3", "res4", "res5"]
14 | 
15 | model.neck.input_shapes = {
16 |     "res2": ShapeSpec(channels=256),
17 |     "res3": ShapeSpec(channels=512),
18 |     "res4": ShapeSpec(channels=1024),
19 |     "res5": ShapeSpec(channels=2048),
20 | }
21 | model.neck.in_features = ["res2", "res3", "res4", "res5"]
22 | model.neck.num_outs = 5
23 | model.transformer.num_feature_levels = 5
24 | 
25 | # modify training config
26 | train.output_dir = "./output/dino_r50_5scale_12ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_base_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_base_384 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_base_patch4_window12_384_22kto1k.pth"
12 | train.output_dir = "./output/dino_swin_base_384_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_large_224_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_large_224 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_large_patch4_window7_224_22kto1k.pth"
12 | train.output_dir = "./output/dino_swin_large_224_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_large_384_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_large_384 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_large_patch4_window12_384_22kto1k.pth"
12 | train.output_dir = "./output/dino_swin_large_384_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_large_384_4scale_36ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_large_384 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_large_patch4_window12_384_22kto1k.pth"
12 | train.output_dir = "./output/dino_swin_large_384_4scale_36ep"
13 | 
14 | train.max_iter = 270000
15 | train.eval_period = 5000
16 | train.log_period = 20
17 | train.checkpointer.period = 5000
18 | 
19 | # gradient clipping for training
20 | train.clip_grad.enabled = True
21 | train.clip_grad.params.max_norm = 0.1
22 | train.clip_grad.params.norm_type = 2
23 | 
24 | # set training devices
25 | train.device = "cuda"
26 | model.device = train.device
27 | 
28 | # modify optimizer config
29 | optimizer.lr = 1e-4
30 | optimizer.betas = (0.9, 0.999)
31 | optimizer.weight_decay = 1e-4
32 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
33 | 
34 | # modify dataloader config
35 | dataloader.train.num_workers = 16
36 | 
37 | # please notice that this is total batch size.
38 | # surpose you're using 4 gpus for training and the batch size for
39 | # each gpu is 16/4 = 4
40 | dataloader.train.total_batch_size = 16
41 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_large_384_5scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_swin_large_384_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | from detectron2.layers import ShapeSpec
10 | 
11 | # modify model config to generate 4 scale backbone features 
12 | # and 5 scale input features
13 | model.backbone.out_indices = (0, 1, 2, 3)
14 | 
15 | model.neck.input_shapes = {
16 |     "p0": ShapeSpec(channels=192),
17 |     "p1": ShapeSpec(channels=384),
18 |     "p2": ShapeSpec(channels=768),
19 |     "p3": ShapeSpec(channels=1536),
20 | }
21 | model.neck.in_features = ["p0", "p1", "p2", "p3"]
22 | model.neck.num_outs = 5
23 | model.transformer.num_feature_levels = 5
24 | 
25 | # modify training config
26 | train.output_dir = "./output/dino_swin_large_384_5scale_12ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_large_384_5scale_36ep.py:
--------------------------------------------------------------------------------
 1 | from .dino_swin_large_384_4scale_36ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | from detectron2.layers import ShapeSpec
10 | 
11 | # modify model config to generate 4 scale backbone features 
12 | # and 5 scale input features
13 | model.backbone.out_indices = (0, 1, 2, 3)
14 | 
15 | model.neck.input_shapes = {
16 |     "p0": ShapeSpec(channels=192),
17 |     "p1": ShapeSpec(channels=384),
18 |     "p2": ShapeSpec(channels=768),
19 |     "p3": ShapeSpec(channels=1536),
20 | }
21 | model.neck.in_features = ["p0", "p1", "p2", "p3"]
22 | model.neck.num_outs = 5
23 | model.transformer.num_feature_levels = 5
24 | 
25 | # modify training config
26 | train.output_dir = "./output/dino_swin_large_384_5scale_36ep"


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_small_224_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_small_224 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_small_patch4_window7_224.pth"
12 | train.output_dir = "./output/dino_swin_small_224_4scale_12ep"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-swin/dino_swin_tiny_224_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_swin_tiny_224 import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | # modify training config
11 | train.init_checkpoint = "/path/to/swin_tiny_patch4_window7_224_22kto1k_finetune.pth"
12 | train.output_dir = "./output/dino_swin_tiny_224_4scale_12ep_22kto1k_finetune"
13 | 
14 | # max training iterations
15 | train.max_iter = 90000
16 | train.eval_period = 5000
17 | train.log_period = 20
18 | train.checkpointer.period = 5000
19 | 
20 | # gradient clipping for training
21 | train.clip_grad.enabled = True
22 | train.clip_grad.params.max_norm = 0.1
23 | train.clip_grad.params.norm_type = 2
24 | 
25 | # set training devices
26 | train.device = "cuda"
27 | model.device = train.device
28 | 
29 | # modify optimizer config
30 | optimizer.lr = 1e-4
31 | optimizer.betas = (0.9, 0.999)
32 | optimizer.weight_decay = 1e-4
33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
34 | 
35 | # modify dataloader config
36 | dataloader.train.num_workers = 16
37 | 
38 | # please notice that this is total batch size.
39 | # surpose you're using 4 gpus for training and the batch size for
40 | # each gpu is 16/4 = 4
41 | dataloader.train.total_batch_size = 16
42 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-vitdet/dino_vitdet_base_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from ..models.dino_vitdet import model
 3 | 
 4 | # get default config
 5 | dataloader = get_config("common/data/coco_detr.py").dataloader
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | train = get_config("common/train.py").train
 9 | 
10 | 
11 | # modify training config
12 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
13 | train.output_dir = "./output/dino_vitdet_base_12ep"
14 | 
15 | # max training iterations
16 | train.max_iter = 90000
17 | 
18 | # run evaluation every 5000 iters
19 | train.eval_period = 5000
20 | 
21 | # log training infomation every 20 iters
22 | train.log_period = 20
23 | 
24 | # save checkpoint every 5000 iters
25 | train.checkpointer.period = 5000
26 | 
27 | # gradient clipping for training
28 | train.clip_grad.enabled = True
29 | train.clip_grad.params.max_norm = 0.1
30 | train.clip_grad.params.norm_type = 2
31 | 
32 | # set training devices
33 | train.device = "cuda"
34 | model.device = train.device
35 | 
36 | # modify optimizer config
37 | optimizer.lr = 1e-4
38 | optimizer.betas = (0.9, 0.999)
39 | optimizer.weight_decay = 1e-4
40 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
41 | 
42 | # modify dataloader config
43 | dataloader.train.num_workers = 16
44 | 
45 | # please notice that this is total batch size.
46 | # surpose you're using 4 gpus for training and the batch size for
47 | # each gpu is 16/4 = 4
48 | dataloader.train.total_batch_size = 16
49 | 
50 | # dump the testing results into output_dir for visualization
51 | dataloader.evaluator.output_dir = train.output_dir
52 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-vitdet/dino_vitdet_base_4scale_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | 
 3 | from .dino_vitdet_base_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     model,
 8 | )
 9 | 
10 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
11 | 
12 | 
13 | # modify training config
14 | train.max_iter = 375000
15 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
16 | train.output_dir = "./output/dino_vitdet_base_50ep"
17 | 


--------------------------------------------------------------------------------
/projects/dino/configs/dino-vitdet/dino_vitdet_large_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 2 | from detectron2.config import LazyCall as L
 3 | from detectron2.solver import WarmupParamScheduler
 4 | 
 5 | from .dino_vitdet_base_4scale_12ep import (
 6 |     train,
 7 |     dataloader,
 8 |     optimizer,
 9 |     lr_multiplier,
10 |     model,
11 | )
12 | 
13 | # convert vitdet-base to vitdet-large
14 | model.backbone.net.embed_dim = 1024
15 | model.backbone.net.depth = 24
16 | model.backbone.net.num_heads = 16
17 | model.backbone.net.drop_path_rate = 0.4
18 | # 5, 11, 17, 23 for global attention
19 | model.backbone.net.window_block_indexes = (
20 |     list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23))
21 | )
22 | 
23 | # modify training config
24 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth"
25 | train.output_dir = "./output/dino_vitdet_large_12ep"
26 | 
27 | 
28 | # use warmup lr scheduler
29 | lr_multiplier = L(WarmupParamScheduler)(
30 |     scheduler=L(MultiStepParamScheduler)(
31 |         values=[1.0, 0.1],
32 |         milestones=[300000, 375000],
33 |     ),
34 |     warmup_length=250 / train.max_iter,
35 |     warmup_factor=0.001,
36 | )


--------------------------------------------------------------------------------
/projects/dino/configs/dino-vitdet/dino_vitdet_large_4scale_50ep.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 2 | from detectron2.config import LazyCall as L
 3 | from detectron2.solver import WarmupParamScheduler
 4 | 
 5 | from .dino_vitdet_large_4scale_12ep import (
 6 |     train,
 7 |     dataloader,
 8 |     optimizer,
 9 |     model,
10 | )
11 | 
12 | 
13 | # modify training config
14 | train.max_iter = 375000
15 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth"
16 | train.output_dir = "./output/dino_vitdet_large_50ep"
17 | 
18 | # use warmup lr scheduler
19 | lr_multiplier = L(WarmupParamScheduler)(
20 |     scheduler=L(MultiStepParamScheduler)(
21 |         values=[1.0, 0.1],
22 |         milestones=[300000, 375000],
23 |     ),
24 |     warmup_length=250 / train.max_iter,
25 |     warmup_factor=0.001,
26 | )


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_convnext.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detrex.modeling.backbone import ConvNeXt
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # convnext-large-4scale baseline
 9 | model.backbone = L(ConvNeXt)(
10 |     in_chans=3,
11 |     depths=[3, 3, 27, 3],
12 |     dims=[192, 384, 768, 1536],
13 |     drop_path_rate=0.0,
14 |     layer_scale_init_value=1.0,
15 |     out_indices=[1, 2, 3],
16 | )
17 | 
18 | # modify neck config
19 | model.neck.input_shapes = {
20 |     "p1": ShapeSpec(channels=384),
21 |     "p2": ShapeSpec(channels=768),
22 |     "p3": ShapeSpec(channels=1536),
23 | }
24 | model.neck.in_features = ["p1", "p2", "p3"]
25 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_eva_01.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import torch.nn as nn
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool
 6 | from detrex.modeling.backbone import EVAViT, SimpleFeaturePyramid
 7 | 
 8 | from .dino_r50 import model
 9 | 
10 | 
11 | # Base
12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
13 | 
14 | # EVA-01
15 | model.backbone = L(SimpleFeaturePyramid)(
16 |     net=L(EVAViT)(
17 |         img_size=1024,
18 |         patch_size=16,
19 |         embed_dim=embed_dim,
20 |         depth=depth,
21 |         num_heads=num_heads,
22 |         drop_path_rate=dp,
23 |         window_size=14,
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         norm_layer=partial(nn.LayerNorm, eps=1e-6),
27 |         window_block_indexes=[
28 |             # 2, 5, 8 11 for global attention
29 |             0,
30 |             1,
31 |             3,
32 |             4,
33 |             6,
34 |             7,
35 |             9,
36 |             10,
37 |         ],
38 |         residual_block_indexes=[],
39 |         use_rel_pos=True,
40 |         out_feature="last_feat",
41 |     ),
42 |     in_feature="${.net.out_feature}",
43 |     out_channels=256,
44 |     scale_factors=(2.0, 1.0, 0.5),  # (4.0, 2.0, 1.0, 0.5) in ViTDet
45 |     top_block=L(LastLevelMaxPool)(),
46 |     norm="LN",
47 |     square_pad=1024,
48 | )
49 | 
50 | # modify neck config
51 | model.neck.input_shapes = {
52 |     "p3": ShapeSpec(channels=256),
53 |     "p4": ShapeSpec(channels=256),
54 |     "p5": ShapeSpec(channels=256),
55 |     "p6": ShapeSpec(channels=256),
56 | }
57 | model.neck.in_features = ["p3", "p4", "p5", "p6"]
58 | model.neck.num_outs = 4
59 | model.transformer.num_feature_levels = 4
60 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_focalnet.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detrex.modeling.backbone import FocalNet
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # focalnet-large-4scale baseline
 9 | model.backbone = L(FocalNet)(
10 |     embed_dim=192,
11 |     depths=(2, 2, 18, 2),
12 |     focal_levels=(3, 3, 3, 3),
13 |     focal_windows=(5, 5, 5, 5),
14 |     use_conv_embed=True,
15 |     use_postln=True,
16 |     use_postln_in_modulation=False,
17 |     use_layerscale=True,
18 |     normalize_modulator=False,
19 |     out_indices=(1, 2, 3),
20 | )
21 | 
22 | # modify neck config
23 | model.neck.input_shapes = {
24 |     "p1": ShapeSpec(channels=384),
25 |     "p2": ShapeSpec(channels=768),
26 |     "p3": ShapeSpec(channels=1536),
27 | }
28 | model.neck.in_features = ["p1", "p2", "p3"]
29 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_internimage.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detrex.modeling.backbone import InternImage
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # internimage-large-4scale baseline
 9 | model.backbone = L(InternImage)(
10 |     core_op="DCNv3",
11 |     channels=160,
12 |     depths=[5, 5, 22, 5],
13 |     groups=[10, 20, 40, 80],
14 |     mlp_ratio=4.,
15 |     drop_path_rate=0.0,
16 |     norm_layer="LN",
17 |     layer_scale=1.0,
18 |     offset_scale=2.0,
19 |     post_norm=True,
20 |     with_cp=False,
21 |     out_indices=(1, 2, 3),
22 | )
23 | 
24 | # modify neck config
25 | model.neck.input_shapes = {
26 |     "p1": ShapeSpec(channels=320),
27 |     "p2": ShapeSpec(channels=640),
28 |     "p3": ShapeSpec(channels=1280),
29 | }
30 | model.neck.in_features = ["p1", "p2", "p3"]
31 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_swin_base_384.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=384,
11 |     embed_dim=128,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(4, 8, 16, 32),
14 |     window_size=12,
15 |     out_indices=(1, 2, 3),
16 | )
17 | 
18 | # modify neck config
19 | model.neck.input_shapes = {
20 |     "p1": ShapeSpec(channels=256),
21 |     "p2": ShapeSpec(channels=512),
22 |     "p3": ShapeSpec(channels=1024),
23 | }
24 | model.neck.in_features = ["p1", "p2", "p3"]
25 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_swin_large_224.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     window_size=7,
15 |     out_indices=(1, 2, 3),
16 | )
17 | 
18 | # modify neck config
19 | model.neck.input_shapes = {
20 |     "p1": ShapeSpec(channels=384),
21 |     "p2": ShapeSpec(channels=768),
22 |     "p3": ShapeSpec(channels=1536),
23 | }
24 | model.neck.in_features = ["p1", "p2", "p3"]
25 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_swin_large_384.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=384,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     window_size=12,
15 |     out_indices=(1, 2, 3),
16 | )
17 | 
18 | # modify neck config
19 | model.neck.input_shapes = {
20 |     "p1": ShapeSpec(channels=384),
21 |     "p2": ShapeSpec(channels=768),
22 |     "p3": ShapeSpec(channels=1536),
23 | }
24 | model.neck.in_features = ["p1", "p2", "p3"]
25 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_swin_small_224.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=96,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(3, 6, 12, 24),
14 |     drop_path_rate=0.2,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=192),
22 |     "p2": ShapeSpec(channels=384),
23 |     "p3": ShapeSpec(channels=768),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | 


--------------------------------------------------------------------------------
/projects/dino/configs/models/dino_swin_tiny_224.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import SwinTransformer
 4 | 
 5 | from .dino_r50 import model
 6 | 
 7 | 
 8 | # modify backbone config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=96,
12 |     depths=(2, 2, 6, 2),
13 |     num_heads=(3, 6, 12, 24),
14 |     drop_path_rate=0.1,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=192),
22 |     "p2": ShapeSpec(channels=384),
23 |     "p3": ShapeSpec(channels=768),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | 


--------------------------------------------------------------------------------
/projects/dino/configs/timm_example.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.modeling import ShapeSpec
 3 | from detectron2.layers import FrozenBatchNorm2d
 4 | from .dino_r50_4scale_12ep import (
 5 |     train,
 6 |     dataloader,
 7 |     optimizer,
 8 |     lr_multiplier,
 9 | )
10 | from .models.dino_r50 import model
11 | 
12 | from detrex.modeling.backbone import TimmBackbone
13 | 
14 | # modify backbone configs
15 | model.backbone = L(TimmBackbone)(
16 |     model_name="resnet152d",  # name in timm
17 |     features_only=True,
18 |     pretrained=True,
19 |     in_channels=3,
20 |     out_indices=(1, 2, 3),
21 |     norm_layer=FrozenBatchNorm2d,
22 | )
23 | 
24 | # modify neck configs
25 | model.neck.input_shapes = {
26 |     "p1": ShapeSpec(channels=256),
27 |     "p2": ShapeSpec(channels=512),
28 |     "p3": ShapeSpec(channels=1024),
29 | }
30 | model.neck.in_features = ["p1", "p2", "p3"]
31 | 
32 | # modify training configs
33 | train.init_checkpoint = ""
34 | 


--------------------------------------------------------------------------------
/projects/dino/configs/torchvision_example.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.modeling import ShapeSpec
 3 | from .dino_r50_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     lr_multiplier,
 8 | )
 9 | from .models.dino_r50 import model
10 | 
11 | from detrex.modeling.backbone import TorchvisionBackbone
12 | 
13 | # modify backbone configs
14 | model.backbone = L(TorchvisionBackbone)(
15 |     model_name="resnet50",
16 |     pretrained=True,
17 |     return_nodes={
18 |         "layer2": "res3",
19 |         "layer3": "res4",
20 |         "layer4": "res5",
21 |     },
22 | )
23 | 
24 | # modify neck configs
25 | model.neck.input_shapes = {
26 |     "res3": ShapeSpec(channels=512),
27 |     "res4": ShapeSpec(channels=1024),
28 |     "res5": ShapeSpec(channels=2048),
29 | }
30 | model.neck.in_features = ["res3", "res4", "res5"]
31 | 
32 | # modify training configs
33 | train.init_checkpoint = ""
34 | 


--------------------------------------------------------------------------------
/projects/dino/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .dino_transformer import (
18 |     DINOTransformerEncoder,
19 |     DINOTransformerDecoder,
20 |     DINOTransformer,
21 | )
22 | from .dino import DINO
23 | from .dn_criterion import DINOCriterion
24 | 


--------------------------------------------------------------------------------
/projects/dino_eva/assets/dino_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino_eva/assets/dino_arch.png


--------------------------------------------------------------------------------
/projects/dino_eva/configs/common/coco_loader_lsj.py:
--------------------------------------------------------------------------------
 1 | import detectron2.data.transforms as T
 2 | from detectron2 import model_zoo
 3 | from detectron2.config import LazyCall as L
 4 | 
 5 | # Data using LSJ
 6 | image_size = 1024
 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader
 8 | dataloader.train.mapper.augmentations = [
 9 |     L(T.RandomFlip)(horizontal=True),  # flip first
10 |     L(T.ResizeScale)(
11 |         min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
12 |     ),
13 |     L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
14 | ]
15 | dataloader.train.mapper.image_format = "RGB"
16 | dataloader.train.total_batch_size = 64
17 | # recompute boxes due to cropping
18 | dataloader.train.mapper.recompute_boxes = True
19 | 
20 | dataloader.test.mapper.augmentations = [
21 |     L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
22 | ]


--------------------------------------------------------------------------------
/projects/dino_eva/configs/common/coco_loader_lsj_1024.py:
--------------------------------------------------------------------------------
 1 | import detectron2.data.transforms as T
 2 | from detectron2 import model_zoo
 3 | from detectron2.config import LazyCall as L
 4 | 
 5 | # Data using LSJ
 6 | image_size = 1024
 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader
 8 | dataloader.train.mapper.augmentations = [
 9 |     L(T.RandomFlip)(horizontal=True),  # flip first
10 |     L(T.ResizeScale)(
11 |         min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
12 |     ),
13 |     L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
14 | ]
15 | dataloader.train.mapper.image_format = "RGB"
16 | dataloader.train.total_batch_size = 64
17 | # recompute boxes due to cropping
18 | dataloader.train.mapper.recompute_boxes = True
19 | 
20 | dataloader.test.mapper.augmentations = [
21 |     L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
22 | ]


--------------------------------------------------------------------------------
/projects/dino_eva/configs/common/coco_loader_lsj_1280.py:
--------------------------------------------------------------------------------
 1 | import detectron2.data.transforms as T
 2 | from detectron2 import model_zoo
 3 | from detectron2.config import LazyCall as L
 4 | 
 5 | # Data using LSJ
 6 | image_size = 1280
 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader
 8 | dataloader.train.mapper.augmentations = [
 9 |     L(T.RandomFlip)(horizontal=True),  # flip first
10 |     L(T.ResizeScale)(
11 |         min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
12 |     ),
13 |     L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
14 | ]
15 | dataloader.train.mapper.image_format = "RGB"
16 | dataloader.train.total_batch_size = 64
17 | # recompute boxes due to cropping
18 | dataloader.train.mapper.recompute_boxes = True
19 | 
20 | dataloader.test.mapper.augmentations = [
21 |     L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
22 | ]


--------------------------------------------------------------------------------
/projects/dino_eva/configs/common/coco_loader_lsj_1536.py:
--------------------------------------------------------------------------------
 1 | import detectron2.data.transforms as T
 2 | from detectron2 import model_zoo
 3 | from detectron2.config import LazyCall as L
 4 | 
 5 | # Data using LSJ
 6 | image_size = 1536
 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader
 8 | dataloader.train.mapper.augmentations = [
 9 |     L(T.RandomFlip)(horizontal=True),  # flip first
10 |     L(T.ResizeScale)(
11 |         min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
12 |     ),
13 |     L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
14 | ]
15 | dataloader.train.mapper.image_format = "RGB"
16 | dataloader.train.total_batch_size = 64
17 | # recompute boxes due to cropping
18 | dataloader.train.mapper.recompute_boxes = True
19 | 
20 | dataloader.test.mapper.augmentations = [
21 |     L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
22 | ]


--------------------------------------------------------------------------------
/projects/dino_eva/configs/models/dino_eva_01.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import torch.nn as nn
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool
 6 | from detrex.modeling.backbone import EVAViT, SimpleFeaturePyramid
 7 | 
 8 | from .dino_r50 import model
 9 | 
10 | 
11 | # Base
12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
13 | 
14 | # EVA-01
15 | model.backbone = L(SimpleFeaturePyramid)(
16 |     net=L(EVAViT)(
17 |         img_size=1024,
18 |         patch_size=16,
19 |         embed_dim=embed_dim,
20 |         depth=depth,
21 |         num_heads=num_heads,
22 |         drop_path_rate=dp,
23 |         window_size=14,
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         norm_layer=partial(nn.LayerNorm, eps=1e-6),
27 |         window_block_indexes=[
28 |             # 2, 5, 8 11 for global attention
29 |             0,
30 |             1,
31 |             3,
32 |             4,
33 |             6,
34 |             7,
35 |             9,
36 |             10,
37 |         ],
38 |         residual_block_indexes=[],
39 |         use_rel_pos=True,
40 |         out_feature="last_feat",
41 |     ),
42 |     in_feature="${.net.out_feature}",
43 |     out_channels=256,
44 |     scale_factors=(2.0, 1.0, 0.5),  # (4.0, 2.0, 1.0, 0.5) in ViTDet
45 |     top_block=L(LastLevelMaxPool)(),
46 |     norm="LN",
47 |     square_pad=1024,
48 | )
49 | 
50 | # modify neck config
51 | model.neck.input_shapes = {
52 |     "p3": ShapeSpec(channels=256),
53 |     "p4": ShapeSpec(channels=256),
54 |     "p5": ShapeSpec(channels=256),
55 |     "p6": ShapeSpec(channels=256),
56 | }
57 | model.neck.in_features = ["p3", "p4", "p5", "p6"]
58 | model.neck.num_outs = 4
59 | model.transformer.num_feature_levels = 4
60 | 


--------------------------------------------------------------------------------
/projects/dino_eva/configs/models/dino_eva_02.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import torch.nn as nn
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool
 6 | from detrex.modeling.backbone import EVA02_ViT, SimpleFeaturePyramid
 7 | 
 8 | from .dino_r50 import model
 9 | 
10 | 
11 | # Base
12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
13 | 
14 | # EVA-01
15 | model.backbone = L(SimpleFeaturePyramid)(
16 |     net=L(EVA02_ViT)(
17 |         img_size=1024,
18 |         patch_size=16,
19 |         embed_dim=embed_dim,
20 |         depth=depth,
21 |         num_heads=num_heads,
22 |         drop_path_rate=dp,
23 |         window_size=14,
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         norm_layer=partial(nn.LayerNorm, eps=1e-6),
27 |         window_block_indexes=[
28 |             # 2, 5, 8 11 for global attention
29 |             0,
30 |             1,
31 |             3,
32 |             4,
33 |             6,
34 |             7,
35 |             9,
36 |             10,
37 |         ],
38 |         residual_block_indexes=[],
39 |         use_rel_pos=True,
40 |         out_feature="last_feat",
41 |     ),
42 |     in_feature="${.net.out_feature}",
43 |     out_channels=256,
44 |     scale_factors=(2.0, 1.0, 0.5),  # (4.0, 2.0, 1.0, 0.5) in ViTDet
45 |     top_block=L(LastLevelMaxPool)(),
46 |     norm="LN",
47 |     square_pad=1024,
48 | )
49 | 
50 | # modify neck config
51 | model.neck.input_shapes = {
52 |     "p3": ShapeSpec(channels=256),
53 |     "p4": ShapeSpec(channels=256),
54 |     "p5": ShapeSpec(channels=256),
55 |     "p6": ShapeSpec(channels=256),
56 | }
57 | model.neck.in_features = ["p3", "p4", "p5", "p6"]
58 | model.neck.num_outs = 4
59 | model.transformer.num_feature_levels = 4
60 | 


--------------------------------------------------------------------------------
/projects/dino_eva/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .dino_transformer import (
18 |     DINOTransformerEncoder,
19 |     DINOTransformerDecoder,
20 |     DINOTransformer,
21 | )
22 | from .dino import DINO
23 | from .dn_criterion import DINOCriterion
24 | 


--------------------------------------------------------------------------------
/projects/dn_deformable_detr/assets/dn_detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dn_deformable_detr/assets/dn_detr_arch.png


--------------------------------------------------------------------------------
/projects/dn_deformable_detr/configs/dn_deformable_detr_r50_12ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dn_deformable_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dn_deformable_detr_r50_12ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 90000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/dn_deformable_detr/configs/dn_deformable_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dn_deformable_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dn_deformable_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/dn_deformable_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .dn_deformable_transformer import (
18 |     DNDeformableDetrTransformerEncoder,
19 |     DNDeformableDetrTransformerDecoder,
20 |     DNDeformableDetrTransformer,
21 | )
22 | from .dn_deformable_detr import DNDeformableDETR
23 | from .dn_criterion import DNCriterion
24 | 


--------------------------------------------------------------------------------
/projects/dn_detr/assets/dn_detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dn_detr/assets/dn_detr_arch.png


--------------------------------------------------------------------------------
/projects/dn_detr/configs/dn_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dn_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dn_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/dn_detr/configs/dn_detr_r50_dc5_50ep.py:
--------------------------------------------------------------------------------
 1 | from .dn_detr_r50_50ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 | )
 7 | from .models.dn_detr_r50_dc5 import model
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
11 | train.output_dir = "./output/dab_detr_r50_dc5_50ep"
12 | 
13 | 


--------------------------------------------------------------------------------
/projects/dn_detr/configs/models/dn_detr_r50_dc5.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet
 3 | 
 4 | from .dn_detr_r50 import model
 5 | 
 6 | 
 7 | model.backbone=L(TorchvisionResNet)(
 8 |     name="resnet50",
 9 |     train_backbone=True,
10 |     dilation=True,
11 |     return_layers={"layer4": "res5"}
12 | )
13 | 


--------------------------------------------------------------------------------
/projects/dn_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .dn_detr import DNDETR
2 | from .dn_transformers import (
3 |     DNDetrTransformerEncoder,
4 |     DNDetrTransformerDecoder,
5 |     DNDetrTransformer,
6 | )
7 | from .dn_criterion import DNCriterion
8 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_12ep.py:
--------------------------------------------------------------------------------
 1 | from .focus_detr_r50_4scale_12ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "./pre-trained/resnet_torch/r101_v1.pkl"
11 | train.output_dir = "./output/focus_detr_r101_4scale_12ep"
12 | 
13 | # modify model config
14 | model.backbone.stages.depth = 101
15 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_24ep.py:
--------------------------------------------------------------------------------
 1 | from .focus_detr_r50_4scale_24ep import (
 2 |     train,
 3 |     dataloader,
 4 |     optimizer,
 5 |     lr_multiplier,
 6 |     model,
 7 | )
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "./pre-trained/resnet_torch/r101_v1.pkl"
11 | train.output_dir = "./output/focus_detr_r101_4scale_24ep"
12 | 
13 | # modify model config
14 | model.backbone.stages.depth = 101
15 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_36ep.py:
--------------------------------------------------------------------------------
 1 | # 使用detectron2的lr_multiplier_3x，学习率，调整学习率衰减
 2 | from detrex.config import get_config
 3 | from .focus_detr_r50_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     model,
 8 | )
 9 | 
10 | # get default config
11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_3x
12 | 
13 | # modify model config
14 | # use the original implementation of dab-detr position embedding in 24 epochs training.
15 | model.position_embedding.temperature = 20
16 | model.position_embedding.offset = 0.0
17 | 
18 | # modify training config
19 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl"
20 | train.output_dir = "./output/focus_detr_r50_4scale_36ep_v3"
21 | 
22 | # max training iterations
23 | train.max_iter = 270000
24 | 
25 | # modify dataloader config
26 | # not filter empty annotations during training
27 | dataloader.train.dataset.filter_empty = True
28 | dataloader.train.num_workers = 16


--------------------------------------------------------------------------------
/projects/focus_detr/configs/focus_detr_resnet/focus_detr_r50_4scale_24ep.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detrex.config import get_config
 8 | from .focus_detr_r50_4scale_12ep import (
 9 |     train,
10 |     dataloader,
11 |     optimizer,
12 |     model,
13 | )
14 | 
15 | # get default config
16 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep
17 | 
18 | # modify model config
19 | # use the original implementation of dab-detr position embedding in 24 epochs training.
20 | model.position_embedding.temperature = 20
21 | model.position_embedding.offset = 0.0
22 | 
23 | # modify training config
24 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl"
25 | train.output_dir = "./output/focus_detr_r50_4scale_24ep"
26 | 
27 | # max training iterations
28 | train.max_iter = 180000
29 | 
30 | # modify dataloader config
31 | # not filter empty annotations during training
32 | dataloader.train.dataset.filter_empty = True
33 | dataloader.train.num_workers = 16


--------------------------------------------------------------------------------
/projects/focus_detr/configs/focus_detr_resnet/focus_detr_r50_4scale_36ep.py:
--------------------------------------------------------------------------------
 1 | # 对24个epoch的进行修改，延长训练时间
 2 | from detrex.config import get_config
 3 | from .focus_detr_r50_4scale_12ep import (
 4 |     train,
 5 |     dataloader,
 6 |     optimizer,
 7 |     model,
 8 | )
 9 | 
10 | # get default config
11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24_36ep
12 | 
13 | # modify model config
14 | # use the original implementation of dab-detr position embedding in 24 epochs training.
15 | model.position_embedding.temperature = 20
16 | model.position_embedding.offset = 0.0
17 | 
18 | # modify training config
19 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl"
20 | train.output_dir = "./output/focus_detr_r50_4scale_36ep_v2"
21 | 
22 | # max training iterations
23 | train.max_iter = 270000
24 | 
25 | # modify dataloader config
26 | # not filter empty annotations during training
27 | dataloader.train.dataset.filter_empty = True
28 | dataloader.train.num_workers = 16


--------------------------------------------------------------------------------
/projects/focus_detr/configs/models/focus_detr_swin_base_384.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.layers import ShapeSpec
 9 | from detectron2.modeling.backbone import SwinTransformer
10 | 
11 | from .focus_detr_r50 import model
12 | 
13 | 
14 | # modify backbone config
15 | model.backbone = L(SwinTransformer)(
16 |     pretrain_img_size=384,
17 |     embed_dim=128,
18 |     depths=(2, 2, 18, 2),
19 |     num_heads=(4, 8, 16, 32),
20 |     window_size=12,
21 |     out_indices=(1, 2, 3),
22 | )
23 | 
24 | # modify neck config
25 | model.neck.input_shapes = {
26 |     "p1": ShapeSpec(channels=256),
27 |     "p2": ShapeSpec(channels=512),
28 |     "p3": ShapeSpec(channels=1024),
29 | }
30 | model.neck.in_features = ["p1", "p2", "p3"]
31 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/models/focus_detr_swin_large_224.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.layers import ShapeSpec
 9 | from detectron2.modeling.backbone import SwinTransformer
10 | 
11 | from .focus_detr_r50 import model
12 | 
13 | 
14 | # modify backbone config
15 | model.backbone = L(SwinTransformer)(
16 |     pretrain_img_size=224,
17 |     embed_dim=192,
18 |     depths=(2, 2, 18, 2),
19 |     num_heads=(6, 12, 24, 48),
20 |     window_size=7,
21 |     out_indices=(1, 2, 3),
22 | )
23 | 
24 | # modify neck config
25 | model.neck.input_shapes = {
26 |     "p1": ShapeSpec(channels=384),
27 |     "p2": ShapeSpec(channels=768),
28 |     "p3": ShapeSpec(channels=1536),
29 | }
30 | model.neck.in_features = ["p1", "p2", "p3"]
31 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/models/focus_detr_swin_large_384.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.layers import ShapeSpec
 9 | from detectron2.modeling.backbone import SwinTransformer
10 | 
11 | from .focus_detr_r50 import model
12 | 
13 | 
14 | # modify backbone config
15 | model.backbone = L(SwinTransformer)(
16 |     pretrain_img_size=384,
17 |     embed_dim=192,
18 |     depths=(2, 2, 18, 2),
19 |     num_heads=(6, 12, 24, 48),
20 |     window_size=12,
21 |     out_indices=(1, 2, 3),
22 | )
23 | 
24 | # modify neck config
25 | model.neck.input_shapes = {
26 |     "p1": ShapeSpec(channels=384),
27 |     "p2": ShapeSpec(channels=768),
28 |     "p3": ShapeSpec(channels=1536),
29 | }
30 | model.neck.in_features = ["p1", "p2", "p3"]
31 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/models/focus_detr_swin_small_224.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.layers import ShapeSpec
 9 | from detectron2.modeling.backbone import SwinTransformer
10 | 
11 | from .focus_detr_r50 import model
12 | 
13 | 
14 | # modify backbone config
15 | model.backbone = L(SwinTransformer)(
16 |     pretrain_img_size=224,
17 |     embed_dim=96,
18 |     depths=(2, 2, 18, 2),
19 |     num_heads=(3, 6, 12, 24),
20 |     drop_path_rate=0.2,
21 |     window_size=7,
22 |     out_indices=(1, 2, 3),
23 | )
24 | 
25 | # modify neck config
26 | model.neck.input_shapes = {
27 |     "p1": ShapeSpec(channels=192),
28 |     "p2": ShapeSpec(channels=384),
29 |     "p3": ShapeSpec(channels=768),
30 | }
31 | model.neck.in_features = ["p1", "p2", "p3"]
32 | 


--------------------------------------------------------------------------------
/projects/focus_detr/configs/models/focus_detr_swin_tiny_224.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 2 | 
 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License.
 4 | 
 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details.
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.layers import ShapeSpec
 9 | from detectron2.modeling.backbone import SwinTransformer
10 | 
11 | from .focus_detr_r50 import model
12 | 
13 | 
14 | # modify backbone config
15 | model.backbone = L(SwinTransformer)(
16 |     pretrain_img_size=224,
17 |     embed_dim=96,
18 |     depths=(2, 2, 6, 2),
19 |     num_heads=(3, 6, 12, 24),
20 |     drop_path_rate=0.1,
21 |     window_size=7,
22 |     out_indices=(1, 2, 3),
23 | )
24 | 
25 | # modify neck config
26 | model.neck.input_shapes = {
27 |     "p1": ShapeSpec(channels=192),
28 |     "p2": ShapeSpec(channels=384),
29 |     "p3": ShapeSpec(channels=768),
30 | }
31 | model.neck.in_features = ["p1", "p2", "p3"]
32 | 


--------------------------------------------------------------------------------
/projects/focus_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .focus_detr_transformer import (
18 |     FOCUS_DETRTransformerEncoder,
19 |     FOCUS_DETRTransformerDecoder,
20 |     FOCUS_DETRTransformer,
21 |     MaskPredictor,
22 | )
23 | from .focus_detr import FOCUS_DETR
24 | from .dn_criterion import FOCUS_DETRCriterion
25 | 


--------------------------------------------------------------------------------
/projects/group_detr/README.md:
--------------------------------------------------------------------------------
 1 | ## Group DETR: Fast DETR Training with Group-Wise One-to-Many Assignment
 2 | 
 3 | Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong
 4 | 
 5 | [[`arXiv`](https://arxiv.org/abs/2207.13085)] [[`BibTeX`](#citing-group-detr)]
 6 | 
 7 | <div align="center">
 8 |   <img src="./assets/group_detr_arch.png"/>
 9 | </div><br/>
10 | 
11 | **Note**: This is the implementation of `Conditional DETR + Group DETR`
12 | 
13 | ## Training
14 | All configs can be trained with:
15 | ```bash
16 | cd detrex
17 | python tools/train_net.py --config-file projects/group_detr/configs/path/to/config.py --num-gpus 8
18 | ```
19 | By default, we use 8 GPUs with total batch size as 16 for training.
20 | 
21 | ## Evaluation
22 | Model evaluation can be done as follows:
23 | ```bash
24 | cd detrex
25 | python tools/train_net.py --config-file projects/group_detr/configs/path/to/config.py --eval-only train.init_checkpoint=/path/to/model_checkpoint
26 | ```
27 | 
28 | ## Citing Group-DETR
29 | If you find our work helpful for your research, please consider citing the following BibTeX entry.
30 | 
31 | ```BibTex
32 | @article{chen2022group,
33 |   title={Group DETR: Fast DETR Training with Group-Wise One-to-Many Assignment},
34 |   author={Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong},
35 |   journal={arXiv preprint arXiv:2207.13085},
36 |   year={2022}
37 | }
38 | ```
39 | 


--------------------------------------------------------------------------------
/projects/group_detr/assets/group_detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/group_detr/assets/group_detr_arch.png


--------------------------------------------------------------------------------
/projects/group_detr/configs/group_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.group_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/group_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/group_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | from .group_detr import GroupDETR
 2 | from .group_detr_transformer import (
 3 |     GroupDetrTransformerEncoder,
 4 |     GroupDetrTransformerDecoder,
 5 |     GroupDetrTransformer,
 6 | )
 7 | from .attention import GroupConditionalSelfAttention
 8 | from .group_criterion import GroupSetCriterion
 9 | from .group_matcher import GroupHungarianMatcher
10 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/assets/h_detr_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/h_deformable_detr/assets/h_detr_arch.png


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_r50_50ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.h_deformable_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/h_deformable_detr_r50_50ep"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_12ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | 
 4 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 5 | # modify model config
 6 | model.with_box_refine = True
 7 | model.as_two_stage = True
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/h_deformable_detr_r50_two_stage_12ep"
12 | train.max_iter = 90000
13 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_36ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | 
 4 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 5 | # modify model config
 6 | model.with_box_refine = True
 7 | model.as_two_stage = True
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/h_deformable_detr_r50_two_stage_36ep"
12 | train.max_iter = 270000
13 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     drop_path_rate=0.2,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=384),
22 |     "p2": ShapeSpec(channels=768),
23 |     "p3": ShapeSpec(channels=1536),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | # modify training config
30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth"
31 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_12ep"
32 | train.max_iter = 90000
33 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep_900queries.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     drop_path_rate=0.2,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=384),
22 |     "p2": ShapeSpec(channels=768),
23 |     "p3": ShapeSpec(channels=1536),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | model.num_queries_one2one = 900
30 | model.transformer.two_stage_num_proposals = 2400
31 | 
32 | # modify training config
33 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth"
34 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_12ep_900queries"
35 | train.max_iter = 90000
36 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     drop_path_rate=0.5,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=384),
22 |     "p2": ShapeSpec(channels=768),
23 |     "p3": ShapeSpec(channels=1536),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | # modify training config
30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth"
31 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_36ep"
32 | train.max_iter = 270000
33 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep_900queries.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=192,
12 |     depths=(2, 2, 18, 2),
13 |     num_heads=(6, 12, 24, 48),
14 |     drop_path_rate=0.5,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=384),
22 |     "p2": ShapeSpec(channels=768),
23 |     "p3": ShapeSpec(channels=1536),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | model.num_queries_one2one = 900
30 | model.transformer.two_stage_num_proposals = 2400
31 | 
32 | # modify training config
33 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth"
34 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_36ep_900queries"
35 | train.max_iter = 270000
36 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_12ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=96,
12 |     depths=(2, 2, 6, 2),
13 |     num_heads=(3, 6, 12, 24),
14 |     drop_path_rate=0.2,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=192),
22 |     "p2": ShapeSpec(channels=384),
23 |     "p3": ShapeSpec(channels=768),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | # modify training config
30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth"
31 | train.output_dir = "./output/h_deformable_detr_swin_tiny_two_stage_12ep"
32 | train.max_iter = 90000
33 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_36ep.py:
--------------------------------------------------------------------------------
 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model
 2 | from detrex.config import get_config
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.backbone import SwinTransformer
 6 | 
 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep
 8 | # modify model config
 9 | model.backbone = L(SwinTransformer)(
10 |     pretrain_img_size=224,
11 |     embed_dim=96,
12 |     depths=(2, 2, 6, 2),
13 |     num_heads=(3, 6, 12, 24),
14 |     drop_path_rate=0.2,
15 |     window_size=7,
16 |     out_indices=(1, 2, 3),
17 | )
18 | 
19 | # modify neck config
20 | model.neck.input_shapes = {
21 |     "p1": ShapeSpec(channels=192),
22 |     "p2": ShapeSpec(channels=384),
23 |     "p3": ShapeSpec(channels=768),
24 | }
25 | model.neck.in_features = ["p1", "p2", "p3"]
26 | model.with_box_refine = True
27 | model.as_two_stage = True
28 | 
29 | # modify training config
30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth"
31 | train.output_dir = "./output/h_deformable_detr_swin_tiny_two_stage_36ep"
32 | train.max_iter = 270000
33 | 


--------------------------------------------------------------------------------
/projects/h_deformable_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .h_deformable_transformer import (
17 |     HDeformableDetrTransformerEncoder,
18 |     HDeformableDetrTransformerDecoder,
19 |     HDeformableDetrTransformer,
20 | )
21 | from .h_deformable_detr import HDeformableDETR
22 | from .deformable_criterion import DeformableCriterion
23 | 


--------------------------------------------------------------------------------
/projects/maskdino/assets/dinosaur.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/dinosaur.png


--------------------------------------------------------------------------------
/projects/maskdino/assets/framework.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/framework.jpg


--------------------------------------------------------------------------------
/projects/maskdino/assets/instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/instance.png


--------------------------------------------------------------------------------
/projects/maskdino/assets/panoptic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/panoptic.png


--------------------------------------------------------------------------------
/projects/maskdino/assets/semantic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/semantic.png


--------------------------------------------------------------------------------
/projects/maskdino/assets/sota.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/sota.png


--------------------------------------------------------------------------------
/projects/maskdino/configs/data/coco_instance_seg.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | 
 3 | import detectron2.data.transforms as T
 4 | from detectron2.config import LazyCall as L
 5 | from detectron2.data import (
 6 |     build_detection_test_loader,
 7 |     build_detection_train_loader,
 8 |     get_detection_dataset_dicts,
 9 | )
10 | from detectron2.evaluation import COCOEvaluator
11 | 
12 | # from detrex.data import DetrDatasetMapper
13 | # from projects.maskDINO.data.dataset_mappers.coco_instance_lsj_aug_dataset_mapper import COCOInstanceLSJDatasetMapper, build_transform_gen
14 | from detrex.data.dataset_mappers import COCOInstanceNewBaselineDatasetMapper,coco_instance_transform_gen
15 | dataloader = OmegaConf.create()
16 | 
17 | dataloader.train = L(build_detection_train_loader)(
18 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
19 |     mapper=L(COCOInstanceNewBaselineDatasetMapper)(
20 |         augmentation=L(coco_instance_transform_gen)(
21 |             image_size=1024,
22 |             min_scale=0.1,
23 |             max_scale=2.0,
24 |             random_flip="horizontal"
25 |         ),
26 |         is_train=True,
27 |         image_format="RGB",
28 |     ),
29 |     total_batch_size=16,
30 |     num_workers=4,
31 | )
32 | 
33 | dataloader.test = L(build_detection_test_loader)(
34 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
35 |     mapper=L(COCOInstanceNewBaselineDatasetMapper)(
36 |         augmentation=[
37 |             L(T.ResizeShortestEdge)(
38 |                 short_edge_length=800,
39 |                 max_size=1333,
40 |             ),
41 |         ],
42 |         is_train=False,
43 |         image_format="RGB",
44 |     ),
45 |     num_workers=4,
46 | )
47 | 
48 | dataloader.evaluator = L(COCOEvaluator)(
49 |     dataset_name="${..test.dataset.names}",
50 | )
51 | 


--------------------------------------------------------------------------------
/projects/maskdino/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 | # from . import datasets_detr
4 | 


--------------------------------------------------------------------------------
/projects/maskdino/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/projects/maskdino/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/evaluation/__init__.py


--------------------------------------------------------------------------------
/projects/maskdino/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) IDEA, Inc. and its affiliates.
2 | from .backbone.swin import D2SwinTransformer
3 | from .pixel_decoder.maskdino_encoder import MaskDINOEncoder
4 | from .meta_arch.maskdino_head import MaskDINOHead
5 | 
6 | 


--------------------------------------------------------------------------------
/projects/maskdino/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/projects/maskdino/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) IDEA, Inc. and its affiliates.
2 | 
3 | 


--------------------------------------------------------------------------------
/projects/maskdino/modeling/pixel_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) IDEA, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/projects/maskdino/modeling/transformer_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) IDEA, Inc. and its affiliates.
2 | from .maskdino_decoder import MaskDINODecoder
3 | 
4 | 


--------------------------------------------------------------------------------
/projects/maskdino/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # import misc


--------------------------------------------------------------------------------
/projects/pnp_detr/README.md:
--------------------------------------------------------------------------------
 1 | ## PnP-DETR: Towards Efficient Visual Analysis with Transformers
 2 | 
 3 | Tao Wang, Li Yuan, Yunpeng Chen, Jiashi Feng, Shuicheng Yan
 4 | 
 5 | [[`arXiv`](https://arxiv.org/abs/2109.07036)] [[`BibTeX`](#citing-pnp-detr)]
 6 | 
 7 | <div align="center">
 8 |   <img src="./assets/PnP-DETR.png"/>
 9 | </div><br/>
10 | 
11 | 
12 | ## Training
13 | Training PnP-DETR model for 300 epochs:
14 | ```bash
15 | cd detrex
16 | python tools/train_net.py --config-file projects/pnp_detr/configs/pnp_detr_r50_300ep.py --num-gpus 8
17 | ```
18 | By default, we use 8 GPUs with total batch size as 64 for training.
19 | 
20 | ## Evaluation
21 | Model evaluation can be done as follows:
22 | ```bash
23 | cd detrex
24 | python tools/train_net.py --config-file projects/pnp_detr/configs/path/to/config.py \
25 |     --eval-only train.init_checkpoint=/path/to/model_checkpoint
26 | ```
27 | 
28 | 
29 | ## Citing PnP-DETR
30 | ```BibTex
31 | @inproceedings{wang2021pnp,
32 |   title={PnP-DETR: Towards Efficient Visual Analysis with Transformers},
33 |   author={Wang, Tao and Yuan, Li and Chen, Yunpeng and Feng, Jiashi and Yan, Shuicheng},
34 |   booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
35 |   pages={4661--4670},
36 |   year={2021}
37 | }
38 | ```


--------------------------------------------------------------------------------
/projects/pnp_detr/assets/PnP-DETR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/pnp_detr/assets/PnP-DETR.png


--------------------------------------------------------------------------------
/projects/pnp_detr/configs/pnp_detr_r101_300ep.py:
--------------------------------------------------------------------------------
1 | from .pnp_detr_r50_300ep import train, dataloader, optimizer, lr_multiplier, model
2 | 
3 | # modify model config
4 | model.backbone.stages.depth = 101
5 | 
6 | # modify training config
7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
8 | train.output_dir = "./output/detr_r101_300ep"
9 | 


--------------------------------------------------------------------------------
/projects/pnp_detr/configs/pnp_detr_r50_300ep.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.pnp_detr_r50 import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 6 | optimizer = get_config("common/optim.py").AdamW
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # modify training config
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/detr_r50_300ep"
12 | train.max_iter = 554400
13 | 
14 | # modify lr_multiplier
15 | lr_multiplier.scheduler.milestones = [369600, 554400]
16 | 
17 | # modify optimizer config
18 | optimizer.weight_decay = 1e-4
19 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
20 | 
21 | # modify dataloader config
22 | dataloader.train.num_workers = 16
23 | dataloader.train.total_batch_size = 64
24 | 


--------------------------------------------------------------------------------
/projects/pnp_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .detr import PnPDETR
2 | from .transformer import (
3 |     PnPDetrTransformerEncoder,
4 |     PnPDetrTransformerDecoder,
5 |     PnPDetrTransformer,
6 | )
7 | 


--------------------------------------------------------------------------------
/projects/sqr_detr/assets/sqr_detr_overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/sqr_detr/assets/sqr_detr_overall.png


--------------------------------------------------------------------------------
/projects/sqr_detr/configs/dab_detr_r50_50ep_sqr.py:
--------------------------------------------------------------------------------
 1 | from detrex.config import get_config
 2 | from .models.dab_detr_r50_sqr import model
 3 | 
 4 | dataloader = get_config("common/data/coco_detr.py").dataloader
 5 | optimizer = get_config("common/optim.py").AdamW
 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep
 7 | train = get_config("common/train.py").train
 8 | 
 9 | # initialize checkpoint to be loaded
10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | train.output_dir = "./output/dab_detr_r50_50ep_sqr"
12 | 
13 | # max training iterations
14 | train.max_iter = 375000
15 | 
16 | # run evaluation every 5000 iters
17 | train.eval_period = 5000
18 | 
19 | # log training infomation every 20 iters
20 | train.log_period = 20
21 | 
22 | # save checkpoint every 5000 iters
23 | train.checkpointer.period = 5000
24 | 
25 | # gradient clipping for training
26 | train.clip_grad.enabled = True
27 | train.clip_grad.params.max_norm = 0.1
28 | train.clip_grad.params.norm_type = 2
29 | 
30 | # set training devices
31 | train.device = "cuda"
32 | model.device = train.device
33 | 
34 | # modify optimizer config
35 | optimizer.lr = 1e-4
36 | optimizer.betas = (0.9, 0.999)
37 | optimizer.weight_decay = 1e-4
38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
39 | 
40 | # modify dataloader config
41 | dataloader.train.num_workers = 16
42 | 
43 | # please notice that this is total batch size.
44 | # surpose you're using 4 gpus for training and the batch size for
45 | # each gpu is 16/4 = 4
46 | dataloader.train.total_batch_size = 16
47 | 
48 | # dump the testing results into output_dir for visualization
49 | dataloader.evaluator.output_dir = train.output_dir
50 | 


--------------------------------------------------------------------------------
/projects/sqr_detr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .dab_transformer_sqr import (
2 |     DabDetrTransformerDecoder_qr,
3 | )
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cloudpickle
 2 | hydra-core
 3 | omegaconf
 4 | pybind11
 5 | flake8 
 6 | isort
 7 | black
 8 | autoflake
 9 | timm
10 | pytest
11 | scipy
12 | psutil
13 | opencv-python
14 | wandb
15 | submitit
16 | einops
17 | fairscale


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | [isort]
 3 | line_length=100
 4 | multi_line_output=3
 5 | include_trailing_comma=True
 6 | known_standard_library=numpy,setuptools,mock
 7 | skip=./datasets,docs,detectron2
 8 | skip_glob=*/__init__.py,**/configs/**,**/tests/config/**, detectron2/*/__init__.py
 9 | known_myself=detrex
10 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx,panopticapi,black,isort,av,iopath,omegaconf,hydra,yaml,pydoc,submitit,cloudpickle,packaging
11 | no_lines_before=STDLIB,THIRDPARTY
12 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
13 | default_section=FIRSTPARTY
14 | 
15 | [mypy]
16 | python_version=3.7
17 | ignore_missing_imports = True
18 | warn_unused_configs = True
19 | disallow_untyped_defs = True
20 | check_untyped_defs = True
21 | warn_unused_ignores = True
22 | warn_redundant_casts = True
23 | show_column_numbers = True
24 | follow_imports = silent
25 | allow_redefinition = True
26 | ; Require all functions to be annotated
27 | disallow_incomplete_defs = True


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .attention import MultiheadAttention
18 | from .transformer import (
19 |     OriginalConditionalAttentionEncoder,
20 |     OriginalConditionalAttentionDecoder,
21 | )
22 | from .potision_embedding import (
23 |     DeformablePositionEmbeddingSine,
24 |     DABPositionEmbeddingSine,
25 |     DABPositionEmbeddingLearned,
26 | )
27 | from .mlp import MLP
28 | from .losses import (
29 |     sigmoid_focal_loss,
30 |     dice_loss,
31 | )
32 | 


--------------------------------------------------------------------------------
/tests/utils/mlp.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The IDEA Authors. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 | 
19 | 
20 | class MLP(nn.Module):
21 |     """Very simple multi-layer perceptron (also called FFN)"""
22 | 
23 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
24 |         super().__init__()
25 |         self.num_layers = num_layers
26 |         h = [hidden_dim] * (num_layers - 1)
27 |         self.layers = nn.ModuleList(
28 |             nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
29 |         )
30 | 
31 |     def forward(self, x):
32 |         for i, layer in enumerate(self.layers):
33 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
34 |         return x
35 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/tools/__init__.py


--------------------------------------------------------------------------------