├── .clang-format ├── .flake8 ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── CITATION.cff ├── LICENSE ├── README.md ├── assets ├── detr_arch.png ├── detrex_logo.png └── logo_2.png ├── changlog.md ├── configs ├── common │ ├── coco_schedule.py │ ├── common_schedule.py │ ├── data │ │ ├── coco.py │ │ ├── coco_detr.py │ │ ├── constants.py │ │ └── custom.py │ ├── optim.py │ └── train.py └── hydra │ ├── slurm │ └── research.yaml │ └── train_args.yaml ├── demo ├── README.md ├── __init__.py ├── demo.py ├── mot_demo.py ├── mot_predictors.py └── predictors.py ├── detrex ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── c2_model_loading.py │ └── detection_checkpoint.py ├── config │ ├── __init__.py │ └── config.py ├── data │ ├── __init__.py │ ├── dataset_mappers │ │ ├── __init__.py │ │ ├── coco_instance_new_baseline_dataset_mapper.py │ │ ├── coco_panoptic_new_baseline_dataset_mapper.py │ │ ├── mask_former_instance_dataset_mapper.py │ │ ├── mask_former_panoptic_dataset_mapper.py │ │ └── mask_former_semantic_dataset_mapper.py │ ├── datasets │ │ ├── __init__.py │ │ ├── register_ade20k_full.py │ │ ├── register_ade20k_instance.py │ │ ├── register_ade20k_panoptic.py │ │ ├── register_coco_panoptic_annos_semseg.py │ │ ├── register_coco_stuff_10k.py │ │ ├── register_mapillary_vistas.py │ │ └── register_mapillary_vistas_panoptic.py │ ├── detr_dataset_mapper.py │ └── transforms │ │ ├── __init__.py │ │ └── color_augmentation.py ├── layers │ ├── __init__.py │ ├── attention.py │ ├── box_ops.py │ ├── conv.py │ ├── csrc │ │ ├── DCNv3 │ │ │ ├── dcnv3.h │ │ │ ├── dcnv3_cpu.cpp │ │ │ ├── dcnv3_cpu.h │ │ │ ├── dcnv3_cuda.cu │ │ │ ├── dcnv3_cuda.h │ │ │ └── dcnv3_im2col_cuda.cuh │ │ ├── MsDeformAttn │ │ │ ├── ms_deform_attn.h │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ ├── ms_deform_attn_cpu.h │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ ├── ms_deform_attn_cuda.h │ │ │ └── ms_deform_im2col_cuda.cuh │ │ ├── cuda_version.cu │ │ └── vision.cpp │ ├── dcn_v3.py │ ├── denoising.py │ ├── layer_norm.py │ ├── mlp.py │ ├── multi_scale_deform_attn.py │ ├── position_embedding.py │ ├── shape_spec.py │ └── transformer.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── convnext.py │ │ ├── eva.py │ │ ├── eva_02.py │ │ ├── eva_02_utils.py │ │ ├── focalnet.py │ │ ├── internimage.py │ │ ├── resnet.py │ │ ├── timm_backbone.py │ │ ├── torchvision_backbone.py │ │ ├── torchvision_resnet.py │ │ └── utils.py │ ├── criterion │ │ ├── __init__.py │ │ ├── base_criterion.py │ │ └── criterion.py │ ├── ema.py │ ├── losses │ │ ├── __init__.py │ │ ├── cross_entropy_loss.py │ │ ├── dice_loss.py │ │ ├── focal_loss.py │ │ ├── giou_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── matcher │ │ ├── __init__.py │ │ ├── match_cost.py │ │ ├── matcher.py │ │ └── modified_matcher.py │ └── neck │ │ ├── __init__.py │ │ └── channel_mapper.py └── utils │ ├── __init__.py │ ├── dist.py │ ├── events.py │ └── misc.py ├── dev ├── linter.sh └── run_unittest.sh ├── docs ├── Makefile ├── README.md ├── requirements.txt └── source │ ├── _static │ └── css │ │ └── line_space.css │ ├── _templates │ ├── .gitkeep │ └── line_space.html │ ├── changelog.md │ ├── conf.py │ ├── index.rst │ ├── modules │ ├── detrex.config.rst │ ├── detrex.data.rst │ ├── detrex.layers.rst │ ├── detrex.modeling.rst │ ├── detrex.utils.rst │ └── index.rst │ └── tutorials │ ├── Config_System.md │ ├── Converters.md │ ├── Customize_Training.md │ ├── Download_Pretrained_Weights.md │ ├── FAQs.md │ ├── Getting_Started.md │ ├── Installation.md │ ├── Model_Zoo.md │ ├── Tools.md │ ├── Using_Pretrained_Backbone.md │ ├── assets │ ├── annotation_demo.jpg │ ├── cosine_lr_scheduler.png │ ├── demo_output.jpg │ ├── dino_prediction_demo.jpg │ ├── exponential_lr_scheduler.png │ ├── linear_lr_scheduler.png │ ├── multi_step_example.png │ ├── multi_step_lr_scheduler.png │ ├── step_lr_scheduler.png │ └── step_lr_with_fixed_gamma.png │ └── index.rst ├── projects ├── README.md ├── align_detr │ ├── README.md │ ├── configs │ │ ├── aligndetr_k=2_r50_4scale_12ep.py │ │ ├── aligndetr_k=2_r50_4scale_24ep.py │ │ ├── aligndetr_k=2_r50_4scale_36ep.py │ │ └── models │ │ │ └── aligndetr_r50.py │ └── modeling │ │ ├── __init__.py │ │ ├── aligndetr.py │ │ ├── criterions │ │ ├── __init__.py │ │ ├── aligndetr_dn_criterion.py │ │ ├── base_criterion.py │ │ ├── many_to_one_criterion.py │ │ └── two_stage_criterion.py │ │ ├── losses │ │ ├── __init__.py │ │ └── losses.py │ │ ├── matchers │ │ ├── __init__.py │ │ └── mixed_matcher.py │ │ └── transformer.py ├── anchor_detr │ ├── README.md │ ├── assets │ │ └── anchor_detr_arch.png │ ├── configs │ │ ├── anchor_detr_r101_50ep.py │ │ ├── anchor_detr_r101_dc5_50ep.py │ │ ├── anchor_detr_r50_50ep.py │ │ ├── anchor_detr_r50_dc5_50ep.py │ │ └── models │ │ │ └── anchor_detr_r50.py │ └── modeling │ │ ├── __init__.py │ │ ├── anchor_detr.py │ │ ├── anchor_detr_transformer.py │ │ ├── row_column_decoupled_attention.py │ │ └── utils.py ├── co_mot │ ├── README.md │ ├── configs │ │ ├── common │ │ │ ├── dancetrack_schedule.py │ │ │ └── data │ │ │ │ └── dancetrack_mot.py │ │ ├── mot_r50.py │ │ └── mot_r50_4scale_10ep.py │ ├── data │ │ ├── __init__.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── register_dancetrack_mot.py │ │ ├── mot_build.py │ │ ├── mot_dataset_mapper.py │ │ └── transforms │ │ │ ├── __init__.py │ │ │ └── mot_transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ └── dancetrack_evaluation.py │ ├── modeling │ │ ├── __init__.py │ │ ├── matcher.py │ │ ├── mot.py │ │ ├── mot_transformer.py │ │ └── qim.py │ ├── train_net.py │ └── util │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ └── misc.py ├── conditional_detr │ ├── README.md │ ├── assets │ │ └── attention-maps.png │ ├── configs │ │ ├── conditional_detr_r101_50ep.py │ │ ├── conditional_detr_r101_dc5_50ep.py │ │ ├── conditional_detr_r50_50ep.py │ │ ├── conditional_detr_r50_dc5_50ep.py │ │ └── models │ │ │ ├── conditional_detr_r50.py │ │ │ └── conditional_detr_r50_dc5.py │ ├── converter.py │ └── modeling │ │ ├── __init__.py │ │ ├── conditional_detr.py │ │ └── conditional_transformer.py ├── dab_deformable_detr │ ├── README.md │ ├── assets │ │ └── dab_detr_overall.png │ ├── configs │ │ ├── dab_deformable_detr_r50_50ep.py │ │ ├── dab_deformable_detr_r50_two_stage_50ep.py │ │ └── models │ │ │ └── dab_deformable_detr_r50.py │ └── modeling │ │ ├── __init__.py │ │ ├── dab_deformable_detr.py │ │ ├── dab_deformable_transformer.py │ │ └── two_stage_criterion.py ├── dab_detr │ ├── README.md │ ├── assets │ │ ├── dab_detr_details.png │ │ └── dab_detr_overall.png │ ├── configs │ │ ├── dab_detr_r101_50ep.py │ │ ├── dab_detr_r101_dc5_50ep.py │ │ ├── dab_detr_r50_3patterns_50ep.py │ │ ├── dab_detr_r50_50ep.py │ │ ├── dab_detr_r50_dc5_3patterns_50ep.py │ │ ├── dab_detr_r50_dc5_50ep.py │ │ ├── dab_detr_swin_b_in21k_50ep.py │ │ ├── dab_detr_swin_t_in1k_50ep.py │ │ └── models │ │ │ ├── dab_detr_r50.py │ │ │ ├── dab_detr_r50_3patterns.py │ │ │ ├── dab_detr_r50_dc5.py │ │ │ ├── dab_detr_swin_base.py │ │ │ └── dab_detr_swin_tiny.py │ └── modeling │ │ ├── __init__.py │ │ ├── dab_detr.py │ │ └── dab_transformer.py ├── deformable_detr │ ├── README.md │ ├── assets │ │ └── deformable_detr.png │ ├── configs │ │ ├── deformable_detr_r50_50ep.py │ │ ├── deformable_detr_r50_two_stage_50ep.py │ │ ├── deformable_detr_r50_with_box_refinement_50ep.py │ │ └── models │ │ │ └── deformable_detr_r50.py │ ├── convert_two_stage.py │ ├── converter.py │ ├── modeling │ │ ├── __init__.py │ │ ├── deformable_criterion.py │ │ ├── deformable_detr.py │ │ └── deformable_transformer.py │ └── train_net.py ├── deta │ ├── README.md │ ├── assets │ │ └── deta.png │ ├── configs │ │ ├── data │ │ │ └── coco_detr_larger.py │ │ ├── deta_r50_5scale_12ep.py │ │ ├── deta_r50_5scale_12ep_bs8.py │ │ ├── deta_r50_5scale_no_frozen_backbone.py │ │ ├── deta_swin_large_finetune_24ep.py │ │ ├── improved_deformable_detr_baseline_50ep.py │ │ ├── models │ │ │ ├── deta_r50.py │ │ │ └── deta_swin.py │ │ └── scheduler │ │ │ └── coco_scheduler.py │ ├── modeling │ │ ├── __init__.py │ │ ├── assigner.py │ │ ├── deformable_detr.py │ │ ├── deformable_transformer.py │ │ └── deta_criterion.py │ └── train_net.py ├── detr │ ├── README.md │ ├── assets │ │ └── DETR.png │ ├── configs │ │ ├── detr_r101_300ep.py │ │ ├── detr_r101_dc5_300ep.py │ │ ├── detr_r50_300ep.py │ │ ├── detr_r50_dc5_300ep.py │ │ └── models │ │ │ ├── detr_r50.py │ │ │ └── detr_r50_dc5.py │ ├── converter.py │ └── modeling │ │ ├── __init__.py │ │ ├── detr.py │ │ └── transformer.py ├── dino │ ├── README.md │ ├── assets │ │ └── dino_arch.png │ ├── configs │ │ ├── dino-convnext │ │ │ ├── dino_convnext_base_384_4scale_12ep.py │ │ │ ├── dino_convnext_large_384_4scale_12ep.py │ │ │ ├── dino_convnext_small_384_4scale_12ep.py │ │ │ └── dino_convnext_tiny_384_4scale_12ep.py │ │ ├── dino-eva-01 │ │ │ ├── dino_eva_01_1536_4scale_12ep.py │ │ │ └── dino_eva_01_4scale_12ep.py │ │ ├── dino-focal │ │ │ ├── dino_focal_base_lrf_fl3_4scale_12ep.py │ │ │ ├── dino_focal_small_lrf_fl3_4scale_12ep.py │ │ │ ├── dino_focal_tiny_lrf_fl3_4scale_12ep.py │ │ │ ├── dino_focalnet_large_lrf_384_4scale_12ep.py │ │ │ ├── dino_focalnet_large_lrf_384_4scale_36ep.py │ │ │ ├── dino_focalnet_large_lrf_384_fl4_4scale_12ep.py │ │ │ ├── dino_focalnet_large_lrf_384_fl4_5scale_12ep.py │ │ │ ├── dino_focalnet_large_lrf_384_fl4_5scale_36ep.py │ │ │ └── focalnet.py │ │ ├── dino-internimage │ │ │ ├── dino_internimage_base_4scale_12ep.py │ │ │ ├── dino_internimage_large_4scale_12ep.py │ │ │ ├── dino_internimage_small_4scale_12ep.py │ │ │ └── dino_internimage_tiny_4scale_12ep.py │ │ ├── dino-resnet │ │ │ ├── dino_r101_4scale_12ep.py │ │ │ ├── dino_r50_4scale_12ep.py │ │ │ ├── dino_r50_4scale_12ep_300dn.py │ │ │ ├── dino_r50_4scale_12ep_better_hyper.py │ │ │ ├── dino_r50_4scale_12ep_no_frozen.py │ │ │ ├── dino_r50_4scale_24ep.py │ │ │ └── dino_r50_5scale_12ep.py │ │ ├── dino-swin │ │ │ ├── dino_swin_base_384_4scale_12ep.py │ │ │ ├── dino_swin_large_224_4scale_12ep.py │ │ │ ├── dino_swin_large_384_4scale_12ep.py │ │ │ ├── dino_swin_large_384_4scale_36ep.py │ │ │ ├── dino_swin_large_384_5scale_12ep.py │ │ │ ├── dino_swin_large_384_5scale_36ep.py │ │ │ ├── dino_swin_small_224_4scale_12ep.py │ │ │ └── dino_swin_tiny_224_4scale_12ep.py │ │ ├── dino-vitdet │ │ │ ├── dino_vitdet_base_4scale_12ep.py │ │ │ ├── dino_vitdet_base_4scale_50ep.py │ │ │ ├── dino_vitdet_large_4scale_12ep.py │ │ │ └── dino_vitdet_large_4scale_50ep.py │ │ ├── models │ │ │ ├── dino_convnext.py │ │ │ ├── dino_eva_01.py │ │ │ ├── dino_focalnet.py │ │ │ ├── dino_internimage.py │ │ │ ├── dino_r50.py │ │ │ ├── dino_swin_base_384.py │ │ │ ├── dino_swin_large_224.py │ │ │ ├── dino_swin_large_384.py │ │ │ ├── dino_swin_small_224.py │ │ │ ├── dino_swin_tiny_224.py │ │ │ └── dino_vitdet.py │ │ ├── timm_example.py │ │ └── torchvision_example.py │ ├── modeling │ │ ├── __init__.py │ │ ├── dino.py │ │ ├── dino_transformer.py │ │ ├── dn_criterion.py │ │ └── two_stage_criterion.py │ └── train_net.py ├── dino_eva │ ├── README.md │ ├── assets │ │ └── dino_arch.png │ ├── configs │ │ ├── common │ │ │ ├── coco_loader_lsj.py │ │ │ ├── coco_loader_lsj_1024.py │ │ │ ├── coco_loader_lsj_1280.py │ │ │ └── coco_loader_lsj_1536.py │ │ ├── dino-eva-01 │ │ │ ├── dino_eva_01_1280_4scale_12ep.py │ │ │ └── dino_eva_01_1536_4scale_12ep.py │ │ ├── dino-eva-02 │ │ │ ├── dino_eva_02_vitdet_b_4attn_1024_lrd0p7_4scale_12ep.py │ │ │ ├── dino_eva_02_vitdet_b_6attn_win32_1536_lrd0p7_4scale_12ep.py │ │ │ ├── dino_eva_02_vitdet_l_4attn_1024_lrd0p8_4scale_12ep.py │ │ │ ├── dino_eva_02_vitdet_l_4attn_1280_lrd0p8_4scale_12ep.py │ │ │ ├── dino_eva_02_vitdet_l_8attn_1536_lrd0p8_4scale_12ep.py │ │ │ └── dino_eva_02_vitdet_l_8attn_win32_1536_lrd0p8_4scale_12ep.py │ │ └── models │ │ │ ├── dino_eva_01.py │ │ │ ├── dino_eva_02.py │ │ │ └── dino_r50.py │ ├── modeling │ │ ├── __init__.py │ │ ├── dino.py │ │ ├── dino_transformer.py │ │ ├── dn_criterion.py │ │ └── two_stage_criterion.py │ └── train_net.py ├── dn_deformable_detr │ ├── README.md │ ├── assets │ │ └── dn_detr_arch.png │ ├── configs │ │ ├── dn_deformable_detr_r50_12ep.py │ │ ├── dn_deformable_detr_r50_50ep.py │ │ └── models │ │ │ └── dn_deformable_detr_r50.py │ ├── converter.py │ └── modeling │ │ ├── __init__.py │ │ ├── dn_criterion.py │ │ ├── dn_deformable_detr.py │ │ └── dn_deformable_transformer.py ├── dn_detr │ ├── README.md │ ├── assets │ │ └── dn_detr_arch.png │ ├── configs │ │ ├── dn_detr_r50_50ep.py │ │ ├── dn_detr_r50_dc5_50ep.py │ │ └── models │ │ │ ├── dn_detr_r50.py │ │ │ └── dn_detr_r50_dc5.py │ └── modeling │ │ ├── __init__.py │ │ ├── dn_criterion.py │ │ ├── dn_detr.py │ │ └── dn_transformers.py ├── focus_detr │ ├── README.md │ ├── configs │ │ ├── focus_detr_resnet │ │ │ ├── focus_detr_r101_4scale_12ep.py │ │ │ ├── focus_detr_r101_4scale_24ep.py │ │ │ ├── focus_detr_r101_4scale_36ep.py │ │ │ ├── focus_detr_r50_4scale_12ep.py │ │ │ ├── focus_detr_r50_4scale_24ep.py │ │ │ └── focus_detr_r50_4scale_36ep.py │ │ ├── focus_detr_swin │ │ │ ├── focus_detr_swin_base_224_4scale_36ep.py │ │ │ ├── focus_detr_swin_base_384_4scale_36ep.py │ │ │ ├── focus_detr_swin_large_384_4scale_36ep.py │ │ │ ├── focus_detr_swin_tiny_224_4scale_12ep.py │ │ │ ├── focus_detr_swin_tiny_224_4scale_22k_12ep.py │ │ │ ├── focus_detr_swin_tiny_224_4scale_22k_36ep.py │ │ │ ├── focus_detr_swin_tiny_224_4scale_24ep.py │ │ │ └── focus_detr_swin_tiny_224_4scale_36ep.py │ │ └── models │ │ │ ├── focus_detr_r50.py │ │ │ ├── focus_detr_swin_base_384.py │ │ │ ├── focus_detr_swin_large_224.py │ │ │ ├── focus_detr_swin_large_384.py │ │ │ ├── focus_detr_swin_small_224.py │ │ │ └── focus_detr_swin_tiny_224.py │ └── modeling │ │ ├── __init__.py │ │ ├── dn_criterion.py │ │ ├── focus_detr.py │ │ ├── focus_detr_transformer.py │ │ ├── foreground_supervision.py │ │ ├── transformer_layer.py │ │ └── two_stage_criterion.py ├── group_detr │ ├── README.md │ ├── assets │ │ └── group_detr_arch.png │ ├── configs │ │ ├── group_detr_r50_50ep.py │ │ └── models │ │ │ └── group_detr_r50.py │ └── modeling │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── group_criterion.py │ │ ├── group_detr.py │ │ ├── group_detr_transformer.py │ │ └── group_matcher.py ├── h_deformable_detr │ ├── README.md │ ├── assets │ │ └── h_detr_arch.png │ ├── configs │ │ ├── h_deformable_detr_r50_50ep.py │ │ ├── h_deformable_detr_r50_two_stage_12ep.py │ │ ├── h_deformable_detr_r50_two_stage_36ep.py │ │ ├── h_deformable_detr_swin_large_two_stage_12ep.py │ │ ├── h_deformable_detr_swin_large_two_stage_12ep_900queries.py │ │ ├── h_deformable_detr_swin_large_two_stage_36ep.py │ │ ├── h_deformable_detr_swin_large_two_stage_36ep_900queries.py │ │ ├── h_deformable_detr_swin_tiny_two_stage_12ep.py │ │ ├── h_deformable_detr_swin_tiny_two_stage_36ep.py │ │ └── models │ │ │ └── h_deformable_detr_r50.py │ ├── modeling │ │ ├── __init__.py │ │ ├── deformable_criterion.py │ │ ├── h_deformable_detr.py │ │ └── h_deformable_transformer.py │ └── train_net.py ├── maskdino │ ├── README.md │ ├── assets │ │ ├── dinosaur.png │ │ ├── framework.jpg │ │ ├── instance.png │ │ ├── panoptic.png │ │ ├── semantic.png │ │ └── sota.png │ ├── configs │ │ ├── data │ │ │ ├── ade20k_semantic_seg.py │ │ │ ├── coco_instance_seg.py │ │ │ └── coco_panoptic_seg.py │ │ ├── maskdino_r50_ade20k_semantic_seg_160k.py │ │ ├── maskdino_r50_coco_instance_seg_50ep.py │ │ ├── maskdino_r50_coco_panoptic_seg_50ep.py │ │ ├── maskdino_r50_instance_seg_50ep.py │ │ └── models │ │ │ └── maskdino_r50.py │ ├── data │ │ ├── __init__.py │ │ └── dataset_mappers │ │ │ ├── __init__.py │ │ │ └── coco_instance_lsj_aug_dataset_mapper.py │ ├── evaluation │ │ ├── __init__.py │ │ └── instance_evaluation.py │ ├── maskdino.py │ ├── modeling │ │ ├── __init__.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── focal.py │ │ │ └── swin.py │ │ ├── criterion.py │ │ ├── matcher.py │ │ ├── meta_arch │ │ │ ├── __init__.py │ │ │ └── maskdino_head.py │ │ ├── pixel_decoder │ │ │ ├── __init__.py │ │ │ ├── maskdino_encoder.py │ │ │ └── position_encoding.py │ │ └── transformer_decoder │ │ │ ├── __init__.py │ │ │ ├── dino_decoder.py │ │ │ ├── maskdino_decoder.py │ │ │ └── utils.py │ └── utils │ │ ├── __init__.py │ │ ├── box_ops.py │ │ ├── misc.py │ │ └── utils.py ├── pnp_detr │ ├── README.md │ ├── assets │ │ └── PnP-DETR.png │ ├── configs │ │ ├── models │ │ │ └── pnp_detr_r50.py │ │ ├── pnp_detr_r101_300ep.py │ │ └── pnp_detr_r50_300ep.py │ └── modeling │ │ ├── __init__.py │ │ ├── detr.py │ │ └── transformer.py └── sqr_detr │ ├── README.md │ ├── assets │ └── sqr_detr_overall.png │ ├── configs │ ├── dab_detr_r50_50ep_sqr.py │ └── models │ │ └── dab_detr_r50_sqr.py │ └── modeling │ ├── __init__.py │ └── dab_transformer_sqr.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── test_cond_attn.py ├── test_ffn.py ├── test_losses.py ├── test_ms_deform_attn.py ├── test_position_embedding.py ├── test_torchvision_backbone.py ├── test_transformer.py └── utils │ ├── __init__.py │ ├── attention.py │ ├── losses.py │ ├── mlp.py │ ├── potision_embedding.py │ └── transformer.py └── tools ├── README.md ├── __init__.py ├── analyze_model.py ├── benchmark.py ├── hydra_train_net.py ├── train_net.py ├── visualize_data.py └── visualize_json_results.py /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = W503, E203, E221, C901, C408, E741, C407, B017 6 | max-line-length = 120 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | exclude = build, detectron2 10 | per-file-ignores = 11 | **/__init__.py:F401,F403,E402 12 | **/configs/**.py:F401,E402 13 | configs/**.py:F401,E402 14 | **/tests/config/**.py:F401,E402 15 | tests/config/**.py:F401,E402 16 | tests/**.py: E402 17 | tools/**.py: E402 18 | projects/**/configs/**.py:F401 19 | detectron2/**.py: F401,F403,E402,F811,W391 20 | detectron2/projects/**.py: F401,F403,E402,F811,W391 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | [submodule "detectron2"] 3 | path = detectron2 4 | url = https://github.com/facebookresearch/detectron2.git 5 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.7" 13 | # You can also specify other tool versions: 14 | # nodejs: "16" 15 | # rust: "1.55" 16 | # golang: "1.17" 17 | 18 | # Build documentation in the docs/ directory with Sphinx 19 | sphinx: 20 | configuration: docs/source/conf.py 21 | 22 | # If using Sphinx, optionally build your docs in additional formats such as PDF 23 | # formats: 24 | # - pdf 25 | 26 | # Optionally declare the Python requirements required to build your docs 27 | python: 28 | install: 29 | - requirements: requirements.txt 30 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - name: "detrex Contributors" 5 | title: "IDEA-CVR Detection-Transformer Toolbox and Benchmark" 6 | date-released: 2022-09-21 7 | url: "https://github.com/IDEA-Research/detrex" 8 | license: Apache-2.0 9 | -------------------------------------------------------------------------------- /assets/detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/detr_arch.png -------------------------------------------------------------------------------- /assets/detrex_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/detrex_logo.png -------------------------------------------------------------------------------- /assets/logo_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/assets/logo_2.png -------------------------------------------------------------------------------- /configs/common/data/coco.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | 3 | import detectron2.data.transforms as T 4 | from detectron2.config import LazyCall as L 5 | from detectron2.data import ( 6 | DatasetMapper, 7 | build_detection_test_loader, 8 | build_detection_train_loader, 9 | get_detection_dataset_dicts, 10 | ) 11 | from detectron2.evaluation import COCOEvaluator 12 | 13 | dataloader = OmegaConf.create() 14 | 15 | dataloader.train = L(build_detection_train_loader)( 16 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), 17 | mapper=L(DatasetMapper)( 18 | is_train=True, 19 | augmentations=[ 20 | L(T.ResizeShortestEdge)( 21 | short_edge_length=(640, 672, 704, 736, 768, 800), 22 | sample_style="choice", 23 | max_size=1333, 24 | ), 25 | L(T.RandomFlip)(horizontal=True), 26 | ], 27 | image_format="BGR", 28 | use_instance_mask=True, 29 | ), 30 | total_batch_size=16, 31 | num_workers=4, 32 | ) 33 | 34 | dataloader.test = L(build_detection_test_loader)( 35 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), 36 | mapper=L(DatasetMapper)( 37 | is_train=False, 38 | augmentations=[ 39 | L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), 40 | ], 41 | image_format="${...train.mapper.image_format}", 42 | ), 43 | num_workers=4, 44 | ) 45 | 46 | dataloader.evaluator = L(COCOEvaluator)( 47 | dataset_name="${..test.dataset.names}", 48 | ) 49 | -------------------------------------------------------------------------------- /configs/common/data/constants.py: -------------------------------------------------------------------------------- 1 | constants = dict( 2 | imagenet_rgb256_mean=[123.675, 116.28, 103.53], 3 | imagenet_rgb256_std=[58.395, 57.12, 57.375], 4 | imagenet_bgr256_mean=[103.530, 116.280, 123.675], 5 | # When using pre-trained models in Detectron1 or any MSRA models, 6 | # std has been absorbed into its conv1 weights, so the std needs to be set 1. 7 | # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) 8 | imagenet_bgr256_std=[1.0, 1.0, 1.0], 9 | ) -------------------------------------------------------------------------------- /configs/common/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver.build import get_default_optimizer_params 5 | 6 | SGD = L(torch.optim.SGD)( 7 | params=L(get_default_optimizer_params)( 8 | # params.model is meant to be set to the model object, before instantiating 9 | # the optimizer. 10 | weight_decay_norm=0.0 11 | ), 12 | lr=0.02, 13 | momentum=0.9, 14 | weight_decay=1e-4, 15 | ) 16 | 17 | 18 | AdamW = L(torch.optim.AdamW)( 19 | params=L(get_default_optimizer_params)( 20 | # params.model is meant to be set to the model object, before instantiating 21 | # the optimizer. 22 | base_lr="${..lr}", 23 | weight_decay_norm=0.0, 24 | ), 25 | lr=1e-4, 26 | betas=(0.9, 0.999), 27 | weight_decay=0.1, 28 | ) 29 | -------------------------------------------------------------------------------- /configs/hydra/slurm/research.yaml: -------------------------------------------------------------------------------- 1 | 2 | partition: research # Partition where to submit 3 | ngpus: ${num_gpus} # Number of gpus to request on each node 4 | nodes: ${num_machines} # Number of nodes to request 5 | cpus_per_task: 5 # Number of cpus per task/gpu 6 | timeout: 240 # Duration of the job, in hours 7 | job_name: "detrex" # job_name to display with `squeue` 8 | job_dir: ~ # Job directory; leave empty for default (hydra.run.dir) 9 | exclude_node: ~ # The node(s) to be excluded for slurm assignment, e.g. SH-IDC1-10-198-3-[10,20] 10 | comment: ~ # Comment to pass to scheduler, e.g. priority message 11 | quotatype: ~ # Some clusters may set different quotatype with different priority, e.g. reserved/spot 12 | 13 | ddp_comm_mode: "tcp" # ddp communication mode, "file" or "tcp" 14 | share_root: /path/that/can/be/accessed/by/all/machines # for "file" mode only 15 | master_port: ~ # for "tcp" mode only, leave empty to find available port automatically 16 | -------------------------------------------------------------------------------- /configs/hydra/train_args.yaml: -------------------------------------------------------------------------------- 1 | ######### converted from default argparse args ########### 2 | # config_file: '' 3 | config_file: ${pycfg_dir}/${pycfg_file} 4 | resume: false 5 | eval_only: false 6 | num_gpus: 1 7 | num_machines: 1 8 | machine_rank: 0 9 | dist_url: tcp://127.0.0.1:24999 10 | opts: [] 11 | ############################################################ 12 | 13 | # aux params for easier management of overrides 14 | pycfg_dir: projects/detr/configs 15 | pycfg_file: detr_r50_300ep.py 16 | 17 | # use automatic experiment name / output dir 18 | auto_output_dir: True 19 | 20 | hydra: 21 | run: 22 | # https://hydra.cc/docs/configure_hydra/workdir/ 23 | dir: "outputs/${hydra.job.override_dirname}/${now:%Y%m%d-%H:%M:%S}" 24 | job: 25 | config: 26 | override_dirname: 27 | kv_sep: '.' 28 | item_sep: '-' 29 | exclude_keys: 30 | - config_file 31 | - pycfg_dir 32 | - slurm 33 | - slurm.quotatype 34 | - dist_url 35 | - auto_output_dir 36 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## detrex demo 3 | 4 | We provide a command line tool to run a simple demo using pretrained weights. 5 | The usage is explained in [Getting Started with detrex](https://detrex.readthedocs.io/en/latest/tutorials/Getting_Started.html). 6 | 7 | -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- 1 | from .predictors import VisualizationDemo 2 | -------------------------------------------------------------------------------- /detrex/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from detrex import layers 17 | from detrex import modeling 18 | from detrex import utils 19 | from detrex import data 20 | from detrex import config 21 | -------------------------------------------------------------------------------- /detrex/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection_checkpoint import DetectionCheckpointer -------------------------------------------------------------------------------- /detrex/config/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .config import try_get_key, get_config 18 | -------------------------------------------------------------------------------- /detrex/config/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import os 18 | import pkg_resources 19 | from omegaconf import OmegaConf 20 | 21 | from detectron2.config import LazyConfig 22 | 23 | 24 | def try_get_key(cfg, *keys, default=None): 25 | """ 26 | Try select keys from lazy cfg until the first key that exists. Otherwise return default. 27 | """ 28 | for k in keys: 29 | none = object() 30 | p = OmegaConf.select(cfg, k, default=none) 31 | if p is not none: 32 | return p 33 | return default 34 | 35 | 36 | def get_config(config_path): 37 | """ 38 | Returns a config object from a config_path. 39 | 40 | Args: 41 | config_path (str): config file name relative to detrex's "configs/" 42 | directory, e.g., "common/train.py" 43 | 44 | Returns: 45 | omegaconf.DictConfig: a config object 46 | """ 47 | cfg_file = pkg_resources.resource_filename( 48 | "detrex.config", os.path.join("configs", config_path) 49 | ) 50 | if not os.path.exists(cfg_file): 51 | raise RuntimeError("{} not available in detrex configs!".format(config_path)) 52 | cfg = LazyConfig.load(cfg_file) 53 | return cfg 54 | -------------------------------------------------------------------------------- /detrex/data/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .detr_dataset_mapper import DetrDatasetMapper 17 | from .dataset_mappers import ( 18 | COCOInstanceNewBaselineDatasetMapper, 19 | COCOPanopticNewBaselineDatasetMapper, 20 | MaskFormerSemanticDatasetMapper, 21 | MaskFormerInstanceDatasetMapper, 22 | MaskFormerPanopticDatasetMapper, 23 | ) 24 | from . import datasets 25 | from .transforms import ColorAugSSDTransform 26 | -------------------------------------------------------------------------------- /detrex/data/dataset_mappers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .coco_instance_new_baseline_dataset_mapper import build_transform_gen as coco_instance_transform_gen 17 | from .coco_panoptic_new_baseline_dataset_mapper import build_transform_gen as coco_panoptic_transform_gen 18 | from .mask_former_semantic_dataset_mapper import build_transform_gen as maskformer_semantic_transform_gen 19 | from .coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper 20 | from .coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper 21 | from .mask_former_instance_dataset_mapper import MaskFormerInstanceDatasetMapper 22 | from .mask_former_panoptic_dataset_mapper import MaskFormerPanopticDatasetMapper 23 | from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper 24 | -------------------------------------------------------------------------------- /detrex/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ------------------------------------------------------------------------------------------------ 16 | # Copyright (c) Facebook, Inc. and its affiliates. 17 | # ------------------------------------------------------------------------------------------------ 18 | 19 | from . import ( 20 | register_ade20k_full, 21 | register_ade20k_panoptic, 22 | register_coco_stuff_10k, 23 | register_mapillary_vistas, 24 | register_coco_panoptic_annos_semseg, 25 | register_ade20k_instance, 26 | register_mapillary_vistas_panoptic, 27 | ) 28 | -------------------------------------------------------------------------------- /detrex/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .color_augmentation import ColorAugSSDTransform -------------------------------------------------------------------------------- /detrex/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .multi_scale_deform_attn import ( 17 | MultiScaleDeformableAttention, 18 | multi_scale_deformable_attn_pytorch, 19 | ) 20 | from .dcn_v3 import ( 21 | DCNv3, 22 | DCNv3Function, 23 | dcnv3_core_pytorch, 24 | ) 25 | from .layer_norm import LayerNorm 26 | from .box_ops import ( 27 | box_cxcywh_to_xyxy, 28 | box_xyxy_to_cxcywh, 29 | box_iou, 30 | generalized_box_iou, 31 | masks_to_boxes, 32 | ) 33 | from .transformer import ( 34 | BaseTransformerLayer, 35 | TransformerLayerSequence, 36 | ) 37 | from .position_embedding import ( 38 | PositionEmbeddingLearned, 39 | PositionEmbeddingSine, 40 | get_sine_pos_embed, 41 | ) 42 | from .mlp import MLP, FFN 43 | from .attention import ( 44 | MultiheadAttention, 45 | ConditionalSelfAttention, 46 | ConditionalCrossAttention, 47 | ) 48 | from .conv import ( 49 | ConvNormAct, 50 | ConvNorm, 51 | ) 52 | from .denoising import ( 53 | apply_box_noise, 54 | apply_label_noise, 55 | GenerateDNQueries, 56 | ) 57 | from .shape_spec import ShapeSpec 58 | -------------------------------------------------------------------------------- /detrex/layers/csrc/DCNv3/dcnv3_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, const float offset_scale, 22 | const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); -------------------------------------------------------------------------------- /detrex/layers/csrc/DCNv3/dcnv3_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, 22 | const float offset_scale, const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); -------------------------------------------------------------------------------- /detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace detrex { 17 | 18 | at::Tensor 19 | ms_deform_attn_cpu_forward( 20 | const at::Tensor &value, 21 | const at::Tensor &spatial_shapes, 22 | const at::Tensor &level_start_index, 23 | const at::Tensor &sampling_loc, 24 | const at::Tensor &attn_weight, 25 | const int im2col_step) 26 | { 27 | AT_ERROR("Not implement on cpu"); 28 | } 29 | 30 | std::vector 31 | ms_deform_attn_cpu_backward( 32 | const at::Tensor &value, 33 | const at::Tensor &spatial_shapes, 34 | const at::Tensor &level_start_index, 35 | const at::Tensor &sampling_loc, 36 | const at::Tensor &attn_weight, 37 | const at::Tensor &grad_output, 38 | const int im2col_step) 39 | { 40 | AT_ERROR("Not implement on cpu"); 41 | } 42 | 43 | } // namespace detrex 44 | -------------------------------------------------------------------------------- /detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace detrex { 15 | 16 | at::Tensor 17 | ms_deform_attn_cpu_forward( 18 | const at::Tensor &value, 19 | const at::Tensor &spatial_shapes, 20 | const at::Tensor &level_start_index, 21 | const at::Tensor &sampling_loc, 22 | const at::Tensor &attn_weight, 23 | const int im2col_step); 24 | 25 | std::vector 26 | ms_deform_attn_cpu_backward( 27 | const at::Tensor &value, 28 | const at::Tensor &spatial_shapes, 29 | const at::Tensor &level_start_index, 30 | const at::Tensor &sampling_loc, 31 | const at::Tensor &attn_weight, 32 | const at::Tensor &grad_output, 33 | const int im2col_step); 34 | 35 | } // namespace detrex 36 | -------------------------------------------------------------------------------- /detrex/layers/csrc/MsDeformAttn/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace detrex { 15 | 16 | at::Tensor ms_deform_attn_cuda_forward( 17 | const at::Tensor &value, 18 | const at::Tensor &spatial_shapes, 19 | const at::Tensor &level_start_index, 20 | const at::Tensor &sampling_loc, 21 | const at::Tensor &attn_weight, 22 | const int im2col_step); 23 | 24 | std::vector ms_deform_attn_cuda_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | 33 | } // namespace detrex -------------------------------------------------------------------------------- /detrex/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace detrex { 4 | int get_cudart_version() { 5 | int runtimeVersion; 6 | cudaRuntimeGetVersion(&runtimeVersion); 7 | return runtimeVersion; 8 | } 9 | } // namespace detrex 10 | -------------------------------------------------------------------------------- /detrex/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include "MsDeformAttn/ms_deform_attn.h" 4 | #include "DCNv3/dcnv3.h" 5 | 6 | namespace detrex { 7 | 8 | #ifdef WITH_CUDA 9 | extern int get_cudart_version(); 10 | #endif 11 | 12 | std::string get_cuda_version() { 13 | #ifdef WITH_CUDA 14 | std::ostringstream oss; 15 | 16 | // copied from 17 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 18 | auto printCudaStyleVersion = [&](int v) { 19 | oss << (v / 1000) << "." << (v / 10 % 100); 20 | if (v % 10 != 0) { 21 | oss << "." << (v % 10); 22 | } 23 | }; 24 | printCudaStyleVersion(get_cudart_version()); 25 | return oss.str(); 26 | #else 27 | return std::string("not available"); 28 | #endif 29 | } 30 | 31 | // similar to 32 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 33 | std::string get_compiler_version() { 34 | std::ostringstream ss; 35 | #if defined(__GNUC__) 36 | #ifndef __clang__ 37 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 38 | #endif 39 | #endif 40 | 41 | #if defined(__clang_major__) 42 | { 43 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 44 | << __clang_patchlevel__; 45 | } 46 | #endif 47 | 48 | #if defined(_MSC_VER) 49 | { ss << "MSVC " << _MSC_FULL_VER; } 50 | #endif 51 | return ss.str(); 52 | } 53 | 54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 55 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 56 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 57 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 58 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 59 | } 60 | 61 | } // namespace detrex -------------------------------------------------------------------------------- /detrex/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ------------------------------------------------------------------------------------------------ 16 | # # Copyright (c) Facebook, Inc. and its affiliates. 17 | # ------------------------------------------------------------------------------------------------ 18 | 19 | from dataclasses import dataclass 20 | from typing import Optional 21 | 22 | 23 | @dataclass 24 | class ShapeSpec: 25 | """ 26 | A simple structure that contains basic shape specification about a tensor. 27 | It is often used as the auxiliary inputs/outputs of models, 28 | to complement the lack of shape inference ability among pytorch modules. 29 | """ 30 | 31 | channels: Optional[int] = None 32 | height: Optional[int] = None 33 | width: Optional[int] = None 34 | stride: Optional[int] = None 35 | -------------------------------------------------------------------------------- /detrex/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .criterion import SetCriterion, BaseCriterion 17 | from .matcher import HungarianMatcher 18 | from .losses import ( 19 | cross_entropy, 20 | CrossEntropyLoss, 21 | sigmoid_focal_loss, 22 | FocalLoss, 23 | dice_loss, 24 | DiceLoss, 25 | smooth_l1_loss, 26 | l1_loss, 27 | L1Loss, 28 | giou_loss, 29 | GIoULoss, 30 | reduce_loss, 31 | weight_reduce_loss, 32 | ) 33 | from .neck import ChannelMapper 34 | from .backbone import ( 35 | BasicStem, 36 | ResNet, 37 | ResNetBlockBase, 38 | make_stage, 39 | BottleneckBlock, 40 | BasicBlock, 41 | ConvNeXt, 42 | FocalNet, 43 | TimmBackbone, 44 | ) 45 | -------------------------------------------------------------------------------- /detrex/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .timm_backbone import TimmBackbone 17 | from .torchvision_backbone import TorchvisionBackbone 18 | from .resnet import ( 19 | BasicStem, 20 | ResNet, 21 | ResNetBlockBase, 22 | make_stage, 23 | BottleneckBlock, 24 | BasicBlock, 25 | DeformBottleneckBlock, 26 | ) 27 | from .convnext import ConvNeXt 28 | from .focalnet import FocalNet 29 | from .internimage import InternImage 30 | from .eva import EVAViT, SimpleFeaturePyramid, get_vit_lr_decay_rate 31 | from .eva_02 import EVA02_ViT 32 | -------------------------------------------------------------------------------- /detrex/modeling/criterion/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .criterion import SetCriterion 17 | from .base_criterion import BaseCriterion 18 | -------------------------------------------------------------------------------- /detrex/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .cross_entropy_loss import cross_entropy, CrossEntropyLoss 17 | from .focal_loss import sigmoid_focal_loss, FocalLoss 18 | from .dice_loss import dice_loss, DiceLoss 19 | from .smooth_l1_loss import smooth_l1_loss, l1_loss, L1Loss 20 | from .giou_loss import giou_loss, GIoULoss 21 | from .utils import reduce_loss, weight_reduce_loss 22 | -------------------------------------------------------------------------------- /detrex/modeling/matcher/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .matcher import HungarianMatcher 17 | from .match_cost import FocalLossCost, CrossEntropyCost, L1Cost, GIoUCost 18 | from .modified_matcher import HungarianMatcher as ModifedMatcher 19 | -------------------------------------------------------------------------------- /detrex/modeling/neck/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .channel_mapper import ChannelMapper 17 | -------------------------------------------------------------------------------- /detrex/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .misc import ( 18 | interpolate, 19 | inverse_sigmoid, 20 | ) 21 | from .dist import ( 22 | is_dist_avail_and_initialized, 23 | get_world_size, 24 | get_rank, 25 | ) 26 | from .events import WandbWriter 27 | -------------------------------------------------------------------------------- /dev/linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | # cd to detrex project root 5 | cd "$(dirname "${BASH_SOURCE[0]}")/.." 6 | 7 | { 8 | black --version | grep -E "22\." > /dev/null 9 | } || { 10 | echo "Linter requires 'black==22.*' !" 11 | exit 1 12 | } 13 | 14 | ISORT_VERSION=$(isort --version-number) 15 | if [[ "$ISORT_VERSION" != 4.3* ]]; then 16 | echo "Linter requires isort==4.3.21 !" 17 | exit 1 18 | fi 19 | 20 | set -v 21 | 22 | echo "Running autoflake ..." 23 | autoflake --remove-unused-variables --in-place --recursive . --exclude=detectron2 24 | 25 | echo "Running isort ..." 26 | isort -y -sp . --atomic 27 | 28 | echo "Running black ..." 29 | black -l 100 . --exclude=detectron2 30 | 31 | echo "Running flake8 ..." 32 | if [ -x "$(command -v flake8)" ]; then 33 | flake8 . 34 | else 35 | python3 -m flake8 . 36 | fi 37 | 38 | 39 | echo "Running clang-format ..." 40 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i 41 | 42 | command -v arc > /dev/null && arc lint -------------------------------------------------------------------------------- /dev/run_unittest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # cd to detrex project root 4 | cd "$(dirname "${BASH_SOURCE[0]}")/.." 5 | 6 | pytest --disable-warnings ./tests -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | html: Makefile 17 | @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 18 | 19 | clean: Makefile 20 | @rm -rf build 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ## Read detrex Documentation 2 | The latest documentation built from this directory is available at [detrex.readthedocs.io](https://detrex.readthedocs.io/en/latest/). 3 | 4 | 5 | ## Build detrex Documentation 6 | 1. Install detrex according to [Installation](https://detrex.readthedocs.io/en/latest/tutorials/Installation.html). 7 | 2. Install additional libraries and run `make html` for building the docs: 8 | ```bash 9 | cd ${detrex-path}/docs 10 | pip install -r requirements.txt --user 11 | make html 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | jinja2<3.1 3 | recommonmark==0.6.0 4 | sphinx-rtd-theme==1.0.0 5 | 6 | # Dependencies here are only those required by import 7 | termcolor 8 | numpy 9 | tqdm 10 | matplotlib 11 | tabulate 12 | Pillow 13 | future 14 | cloudpickle 15 | hydra-core 16 | omegaconf==2.1.0 17 | pybind11 18 | flake8==3.8.1 19 | isort==4.3.21 20 | black==22.3.0 21 | autoflake 22 | timm 23 | pytest 24 | scipy==1.7.3 25 | fvcore==0.1.5.post20220512 26 | # git+https://github.com/facebookresearch/fvcore.git 27 | git+https://github.com/facebookresearch/detectron2.git 28 | https://download.pytorch.org/whl/cpu/torch-1.8.1%2Bcpu-cp37-cp37m-linux_x86_64.whl 29 | https://download.pytorch.org/whl/cpu/torchvision-0.9.1%2Bcpu-cp37-cp37m-linux_x86_64.whl 30 | git+https://github.com/IDEA-Research/detrex.git 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/_static/css/line_space.css: -------------------------------------------------------------------------------- 1 | .rst-content .section ol li>*, .rst-content .section ul li>* { 2 | margin-top: 0px; 3 | margin-bottom: 0px; 4 | } 5 | 6 | .rst-content .section ol li>*, .rst-content .section li ul>* { 7 | margin-top: 0px; 8 | margin-bottom: 0px; 9 | } 10 | 11 | .rst-content .section ol li>*, .rst-content .section ul li ul { 12 | margin-top: 0px; 13 | margin-bottom: 0px; 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/source/_templates/line_space.html: -------------------------------------------------------------------------------- 1 | {% extends "!line_space.html" %} 2 | {% set css_files = css_files + [ "_static/css/line_space.css" ] %} 3 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. detrex documentation master file, created by 2 | sphinx-quickstart on Mon Nov 29 10:26:07 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to detrex's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | tutorials/index 13 | modules/index 14 | changelog.md -------------------------------------------------------------------------------- /docs/source/modules/detrex.config.rst: -------------------------------------------------------------------------------- 1 | detrex.config 2 | ############################## 3 | 4 | .. currentmodule:: detrex.config 5 | .. automodule:: detrex.config 6 | :members: 7 | try_get_key, 8 | get_config, 9 | 10 | -------------------------------------------------------------------------------- /docs/source/modules/detrex.data.rst: -------------------------------------------------------------------------------- 1 | detrex.data 2 | ############################## 3 | 4 | .. currentmodule:: detrex.data 5 | .. automodule:: detrex.data 6 | :members: 7 | DetrDatasetMapper, 8 | 9 | -------------------------------------------------------------------------------- /docs/source/modules/detrex.layers.rst: -------------------------------------------------------------------------------- 1 | detrex.layers 2 | ############################## 3 | 4 | .. currentmodule:: detrex.layers 5 | .. automodule:: detrex.layers 6 | :members: 7 | MultiheadAttention, 8 | MultiScaleDeformableAttention, 9 | ConditionalSelfAttention, 10 | ConditionalCrossAttention, 11 | GenerateDNQueries, 12 | apply_label_noise, 13 | apply_box_noise, 14 | FFN, 15 | MLP, 16 | PositionEmbeddingSine, 17 | PositionEmbeddingLearned, 18 | LayerNorm, 19 | get_sine_pos_embed, 20 | BaseTransformerLayer, 21 | TransformerLayerSequence, 22 | ConvNormAct, 23 | box_cxcywh_to_xyxy, 24 | box_xyxy_to_cxcywh, 25 | box_iou, 26 | generalized_box_iou, 27 | masks_to_boxes, -------------------------------------------------------------------------------- /docs/source/modules/detrex.modeling.rst: -------------------------------------------------------------------------------- 1 | detrex.modeling 2 | ############################## 3 | 4 | backbone 5 | ------------------------------ 6 | .. currentmodule:: detrex.modeling 7 | .. automodule:: detrex.modeling.backbone 8 | :member-order: bysource 9 | :members: 10 | ResNet, 11 | make_stage, 12 | ConvNeXt, 13 | FocalNet, 14 | TimmBackbone, 15 | TorchvisionBackbone, 16 | 17 | neck 18 | ------------------------------ 19 | .. currentmodule:: detrex.modeling 20 | .. automodule:: detrex.modeling.neck 21 | :member-order: bysource 22 | :members: 23 | ChannelMapper, 24 | 25 | 26 | matcher 27 | ------------------------------ 28 | .. currentmodule:: detrex.modeling 29 | .. automodule:: detrex.modeling.matcher 30 | :member-order: bysource 31 | :members: 32 | HungarianMatcher, 33 | 34 | 35 | losses 36 | ------------------------------ 37 | .. currentmodule:: detrex.modeling 38 | .. automodule:: detrex.modeling.losses 39 | :member-order: bysource 40 | :members: 41 | sigmoid_focal_loss, 42 | dice_loss, -------------------------------------------------------------------------------- /docs/source/modules/detrex.utils.rst: -------------------------------------------------------------------------------- 1 | detrex.utils 2 | ############################## 3 | 4 | .. currentmodule:: detrex.utils 5 | .. automodule:: detrex.utils 6 | :members: 7 | is_dist_avail_and_initialized, 8 | get_world_size, 9 | interpolate, 10 | inverse_sigmoid, -------------------------------------------------------------------------------- /docs/source/modules/index.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | detrex.config 8 | detrex.data 9 | detrex.layers 10 | detrex.modeling 11 | detrex.utils -------------------------------------------------------------------------------- /docs/source/tutorials/assets/annotation_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/annotation_demo.jpg -------------------------------------------------------------------------------- /docs/source/tutorials/assets/cosine_lr_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/cosine_lr_scheduler.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/demo_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/demo_output.jpg -------------------------------------------------------------------------------- /docs/source/tutorials/assets/dino_prediction_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/dino_prediction_demo.jpg -------------------------------------------------------------------------------- /docs/source/tutorials/assets/exponential_lr_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/exponential_lr_scheduler.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/linear_lr_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/linear_lr_scheduler.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/multi_step_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/multi_step_example.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/multi_step_lr_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/multi_step_lr_scheduler.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/step_lr_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/step_lr_scheduler.png -------------------------------------------------------------------------------- /docs/source/tutorials/assets/step_lr_with_fixed_gamma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/docs/source/tutorials/assets/step_lr_with_fixed_gamma.png -------------------------------------------------------------------------------- /docs/source/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Installation.md 9 | Getting_Started.md 10 | Config_System.md 11 | Converters.md 12 | Download_Pretrained_Weights.md 13 | Using_Pretrained_Backbone.md 14 | Tools.md 15 | Customize_Training.md 16 | Model_Zoo.md 17 | FAQs.md 18 | 19 | -------------------------------------------------------------------------------- /projects/align_detr/configs/aligndetr_k=2_r50_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.aligndetr_r50 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | model.criterion.match_num = [2,2,2,2,2,2,1] 11 | model.criterion.tau = 1.5 12 | # modify training config 13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 14 | train.output_dir = "./output/aligndetr_k2_12ep" 15 | 16 | # max training iterations 17 | train.max_iter = 90000 18 | 19 | # run evaluation every 5000 iters 20 | train.eval_period = 5000 21 | 22 | # log training infomation every 20 iters 23 | train.log_period = 100 24 | 25 | # save checkpoint every 5000 iters 26 | train.checkpointer.period = 10000 27 | 28 | # gradient clipping for training 29 | train.clip_grad.enabled = True 30 | train.clip_grad.params.max_norm = 0.1 31 | train.clip_grad.params.norm_type = 2 32 | 33 | # set training devices 34 | train.device = "cuda" 35 | 36 | model.device = train.device 37 | # modify optimizer config 38 | optimizer.lr = 1e-4 39 | optimizer.betas = (0.9, 0.999) 40 | optimizer.weight_decay = 1e-4 41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 42 | 43 | # modify dataloader config 44 | dataloader.train.num_workers = 16 45 | 46 | # please notice that this is total batch size. 47 | # surpose you're using 4 gpus for training and the batch size for 48 | # each gpu is 16/4 = 4 49 | dataloader.train.total_batch_size = 16 50 | 51 | # dump the testing results into output_dir for visualization 52 | dataloader.evaluator.output_dir = train.output_dir 53 | -------------------------------------------------------------------------------- /projects/align_detr/configs/aligndetr_k=2_r50_4scale_24ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.aligndetr_r50 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep 8 | train = get_config("common/train.py").train 9 | 10 | model.criterion.match_num = [2,2,2,2,2,2,1] 11 | model.criterion.tau = 1.5 12 | # modify training config 13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 14 | train.output_dir = "./output/aligndetr_k2_24ep" 15 | 16 | # max training iterations 17 | train.max_iter = 180000 18 | 19 | # run evaluation every 5000 iters 20 | train.eval_period = 5000 21 | 22 | # log training infomation every 20 iters 23 | train.log_period = 100 24 | 25 | # save checkpoint every 5000 iters 26 | train.checkpointer.period = 10000 27 | 28 | # gradient clipping for training 29 | train.clip_grad.enabled = True 30 | train.clip_grad.params.max_norm = 0.1 31 | train.clip_grad.params.norm_type = 2 32 | 33 | # set training devices 34 | train.device = "cuda" 35 | 36 | model.device = train.device 37 | # modify optimizer config 38 | optimizer.lr = 1e-4 39 | optimizer.betas = (0.9, 0.999) 40 | optimizer.weight_decay = 1e-4 41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 42 | 43 | # modify dataloader config 44 | dataloader.train.num_workers = 16 45 | 46 | # please notice that this is total batch size. 47 | # surpose you're using 4 gpus for training and the batch size for 48 | # each gpu is 16/4 = 4 49 | dataloader.train.total_batch_size = 16 50 | 51 | # dump the testing results into output_dir for visualization 52 | dataloader.evaluator.output_dir = train.output_dir 53 | -------------------------------------------------------------------------------- /projects/align_detr/configs/aligndetr_k=2_r50_4scale_36ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.aligndetr_r50 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 8 | train = get_config("common/train.py").train 9 | 10 | model.criterion.match_num = [2,2,2,2,2,2,1] 11 | model.criterion.tau = 1.5 12 | # modify training config 13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 14 | train.output_dir = "./output/aligndetr_k2_36ep" 15 | 16 | # max training iterations 17 | train.max_iter = 270000 18 | 19 | # run evaluation every 5000 iters 20 | train.eval_period = 5000 21 | 22 | # log training infomation every 20 iters 23 | train.log_period = 100 24 | 25 | # save checkpoint every 5000 iters 26 | train.checkpointer.period = 10000 27 | 28 | # gradient clipping for training 29 | train.clip_grad.enabled = True 30 | train.clip_grad.params.max_norm = 0.1 31 | train.clip_grad.params.norm_type = 2 32 | 33 | # set training devices 34 | train.device = "cuda" 35 | 36 | model.device = train.device 37 | # modify optimizer config 38 | optimizer.lr = 1e-4 39 | optimizer.betas = (0.9, 0.999) 40 | optimizer.weight_decay = 1e-4 41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 42 | 43 | # modify dataloader config 44 | dataloader.train.num_workers = 16 45 | 46 | # please notice that this is total batch size. 47 | # surpose you're using 4 gpus for training and the batch size for 48 | # each gpu is 16/4 = 4 49 | dataloader.train.total_batch_size = 16 50 | 51 | # dump the testing results into output_dir for visualization 52 | dataloader.evaluator.output_dir = train.output_dir 53 | -------------------------------------------------------------------------------- /projects/align_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .transformer import ( 18 | TransformerEncoder, 19 | TransformerDecoder, 20 | Transformer, 21 | ) 22 | from .aligndetr import AlignDETR 23 | from .criterions import AlignDETRCriterion 24 | from .matchers import MixedMatcher -------------------------------------------------------------------------------- /projects/align_detr/modeling/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_criterion import BaseCriterion 2 | from .aligndetr_dn_criterion import AlignDETRCriterion 3 | from .many_to_one_criterion import ManyToOneCriterion -------------------------------------------------------------------------------- /projects/align_detr/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import * -------------------------------------------------------------------------------- /projects/align_detr/modeling/matchers/__init__.py: -------------------------------------------------------------------------------- 1 | from .mixed_matcher import MixedMatcher -------------------------------------------------------------------------------- /projects/anchor_detr/assets/anchor_detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/anchor_detr/assets/anchor_detr_arch.png -------------------------------------------------------------------------------- /projects/anchor_detr/configs/anchor_detr_r101_50ep.py: -------------------------------------------------------------------------------- 1 | from .anchor_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.anchor_detr_r50 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth" 11 | train.output_dir = "./output/anchor_detr_r101_50ep" 12 | 13 | # modify model 14 | model.backbone.name = "resnet101" 15 | -------------------------------------------------------------------------------- /projects/anchor_detr/configs/anchor_detr_r101_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .anchor_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.anchor_detr_r50 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth" 11 | train.output_dir = "./output/anchor_detr_r101_dc5_50ep" 12 | 13 | # modify model 14 | model.backbone.name = "resnet101" 15 | model.backbone.dilation = True 16 | -------------------------------------------------------------------------------- /projects/anchor_detr/configs/anchor_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.anchor_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/anchor_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/anchor_detr/configs/anchor_detr_r50_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .anchor_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.anchor_detr_r50 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/anchor_detr_r50_dc5_50ep" 12 | 13 | # modify model 14 | model.backbone.dilation = True 15 | -------------------------------------------------------------------------------- /projects/anchor_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_detr import AnchorDETR 2 | from .anchor_detr_transformer import AnchorDETRTransformer -------------------------------------------------------------------------------- /projects/anchor_detr/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | 5 | import math 6 | 7 | import torch 8 | 9 | 10 | def pos2posemb2d(pos, num_pos_feats=128, temperature=10000): 11 | scale = 2 * math.pi 12 | pos = pos * scale 13 | dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device) 14 | dim_t = temperature ** ( 15 | 2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats 16 | ) 17 | pos_x = pos[..., 0, None] / dim_t 18 | pos_y = pos[..., 1, None] / dim_t 19 | pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2) 20 | pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2) 21 | posemb = torch.cat((pos_y, pos_x), dim=-1) 22 | return posemb 23 | 24 | 25 | def pos2posemb1d(pos, num_pos_feats=256, temperature=10000): 26 | scale = 2 * math.pi 27 | pos = pos * scale 28 | dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device) 29 | dim_t = temperature ** ( 30 | 2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats 31 | ) 32 | pos_x = pos[..., None] / dim_t 33 | posemb = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2) 34 | return posemb 35 | 36 | 37 | def mask2pos(mask): 38 | not_mask = ~mask 39 | y_embed = not_mask[:, :, 0].cumsum(1, dtype=torch.float32) 40 | x_embed = not_mask[:, 0, :].cumsum(1, dtype=torch.float32) 41 | y_embed = (y_embed - 0.5) / y_embed[:, -1:] 42 | x_embed = (x_embed - 0.5) / x_embed[:, -1:] 43 | return y_embed, x_embed 44 | -------------------------------------------------------------------------------- /projects/co_mot/configs/common/dancetrack_schedule.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver import WarmupParamScheduler 5 | 6 | 7 | def default_dancetrack_scheduler(epochs=50, decay_epochs=40, warmup_epochs=0, max_iter_epoch=5225): 8 | """ 9 | Returns the config for a default multi-step LR scheduler such as "50epochs", 10 | commonly referred to in papers, where every 1x has the total length of 1440k 11 | training images (~12 COCO epochs). LR is decayed once at the end of training. 12 | 13 | Args: 14 | epochs (int): total training epochs. 15 | decay_epochs (int): lr decay steps. 16 | warmup_epochs (int): warmup epochs. 17 | 18 | Returns: 19 | DictConfig: configs that define the multiplier for LR during training 20 | """ 21 | # total number of iterations assuming 8 batch size, using 41796/8=5225 22 | total_steps_16bs = epochs * max_iter_epoch 23 | decay_steps = decay_epochs * max_iter_epoch 24 | warmup_steps = warmup_epochs * max_iter_epoch 25 | scheduler = L(MultiStepParamScheduler)( 26 | values=[1.0, 0.1], 27 | milestones=[decay_steps, total_steps_16bs], 28 | ) 29 | return L(WarmupParamScheduler)( 30 | scheduler=scheduler, 31 | warmup_length=warmup_steps / total_steps_16bs, 32 | warmup_method="linear", 33 | warmup_factor=0.001, 34 | ) 35 | 36 | 37 | # default scheduler for detr 38 | lr_multiplier_12ep = default_dancetrack_scheduler(12, 11, 0, 5225) 39 | -------------------------------------------------------------------------------- /projects/co_mot/data/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: 颜峰 && bphengyan@163.com 3 | Date: 2023-05-31 09:24:33 4 | LastEditors: 颜峰 && bphengyan@163.com 5 | LastEditTime: 2023-05-31 09:24:33 6 | FilePath: /detrex/projects/co_mot/data/__init__.py 7 | Description: 8 | 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 10 | ''' 11 | # coding=utf-8 12 | # Copyright 2022 The IDEA Authors. All rights reserved. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from .mot_dataset_mapper import MotDatasetMapper, MotDatasetInferenceMapper 27 | from . import datasets 28 | from .mot_build import build_mot_train_loader, build_mot_test_loader, mot_collate_fn -------------------------------------------------------------------------------- /projects/co_mot/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: 颜峰 && bphengyan@163.com 3 | Date: 2023-05-31 09:41:04 4 | LastEditors: 颜峰 && bphengyan@163.com 5 | LastEditTime: 2023-05-31 09:41:05 6 | FilePath: /detrex/projects/co_mot/data/datasets/__init__.py 7 | Description: 8 | 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 10 | ''' 11 | # coding=utf-8 12 | # Copyright 2022 The IDEA Authors. All rights reserved. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | # ------------------------------------------------------------------------------------------------ 26 | # Copyright (c) Facebook, Inc. and its affiliates. 27 | # ------------------------------------------------------------------------------------------------ 28 | 29 | from . import ( 30 | register_dancetrack_mot, 31 | ) 32 | -------------------------------------------------------------------------------- /projects/co_mot/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: 颜峰 && bphengyan@163.com 3 | Date: 2023-05-31 09:41:55 4 | LastEditors: 颜峰 && bphengyan@163.com 5 | LastEditTime: 2023-05-31 09:41:56 6 | FilePath: /detrex/projects/co_mot/data/transforms/__init__.py 7 | Description: 8 | 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 10 | ''' 11 | # coding=utf-8 12 | # Copyright 2022 The IDEA Authors. All rights reserved. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import mot_transforms -------------------------------------------------------------------------------- /projects/co_mot/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .dancetrack_evaluation import DancetrackEvaluator 3 | 4 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 5 | -------------------------------------------------------------------------------- /projects/co_mot/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: 颜峰 && bphengyan@163.com 3 | Date: 2023-05-26 10:06:20 4 | LastEditors: 颜峰 && bphengyan@163.com 5 | LastEditTime: 2023-05-30 16:03:02 6 | FilePath: /detrex/projects/co_mot/modeling/__init__.py 7 | Description: 8 | 9 | Copyright (c) 2023 by ${git_name_email}, All Rights Reserved. 10 | ''' 11 | # coding=utf-8 12 | # Copyright 2022 The IDEA Authors. All rights reserved. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | 27 | from .mot import MOT 28 | from .mot import ClipMatcher as MOTClipMatcher 29 | from .mot import TrackerPostProcess as MOTTrackerPostProcess 30 | from .mot import RuntimeTrackerBase as MOTRuntimeTrackerBase 31 | 32 | from .mot_transformer import DeformableTransformer as MOTDeformableTransformer 33 | 34 | from .qim import QueryInteractionModuleGroup as MOTQueryInteractionModuleGroup 35 | 36 | from .matcher import HungarianMatcherGroup as MOTHungarianMatcherGroup 37 | 38 | -------------------------------------------------------------------------------- /projects/co_mot/util/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-research. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) 5 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 6 | # ------------------------------------------------------------------------ 7 | # Modified from DETR (https://github.com/facebookresearch/detr) 8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 9 | # ------------------------------------------------------------------------ 10 | 11 | -------------------------------------------------------------------------------- /projects/conditional_detr/assets/attention-maps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/conditional_detr/assets/attention-maps.png -------------------------------------------------------------------------------- /projects/conditional_detr/configs/conditional_detr_r101_50ep.py: -------------------------------------------------------------------------------- 1 | from .conditional_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify model config 10 | model.backbone.stages.depth = 101 11 | 12 | # modify training config 13 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 14 | train.output_dir = "./output/conditional_detr_r101_50ep" 15 | -------------------------------------------------------------------------------- /projects/conditional_detr/configs/conditional_detr_r101_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .conditional_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.conditional_detr_r50_dc5 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth" 11 | train.output_dir = "./output/conditional_detr_r101_dc5_50ep" 12 | 13 | # modify model 14 | model.backbone.name = "resnet101" 15 | -------------------------------------------------------------------------------- /projects/conditional_detr/configs/conditional_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.conditional_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/conditional_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/conditional_detr/configs/conditional_detr_r50_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .conditional_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.conditional_detr_r50_dc5 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/conditional_detr_r50_dc5_50ep" 12 | -------------------------------------------------------------------------------- /projects/conditional_detr/configs/models/conditional_detr_r50_dc5.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet 3 | 4 | from .conditional_detr_r50 import model 5 | 6 | 7 | model.backbone=L(TorchvisionResNet)( 8 | name="resnet50", 9 | train_backbone=True, 10 | dilation=True, 11 | return_layers={"layer4": "res5"} 12 | ) 13 | -------------------------------------------------------------------------------- /projects/conditional_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .conditional_detr import ConditionalDETR 2 | from .conditional_transformer import ( 3 | ConditionalDetrTransformerEncoder, 4 | ConditionalDetrTransformerDecoder, 5 | ConditionalDetrTransformer, 6 | ) 7 | -------------------------------------------------------------------------------- /projects/dab_deformable_detr/assets/dab_detr_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_deformable_detr/assets/dab_detr_overall.png -------------------------------------------------------------------------------- /projects/dab_deformable_detr/configs/dab_deformable_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dab_deformable_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dab_deformable_detr_r50_50ep" 12 | 13 | # set training seed 14 | train.seed = 42 15 | 16 | # max training iterations 17 | train.max_iter = 375000 18 | 19 | # run evaluation every 5000 iters 20 | train.eval_period = 5000 21 | 22 | # log training infomation every 20 iters 23 | train.log_period = 20 24 | 25 | # save checkpoint every 5000 iters 26 | train.checkpointer.period = 5000 27 | 28 | # gradient clipping for training 29 | train.clip_grad.enabled = True 30 | train.clip_grad.params.max_norm = 0.1 31 | train.clip_grad.params.norm_type = 2 32 | 33 | # set training devices 34 | train.device = "cuda" 35 | model.device = train.device 36 | 37 | # modify optimizer config 38 | optimizer.lr = 1e-4 39 | optimizer.betas = (0.9, 0.999) 40 | optimizer.weight_decay = 1e-4 41 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 42 | 43 | # modify dataloader config 44 | dataloader.train.num_workers = 16 45 | 46 | # please notice that this is total batch size. 47 | # surpose you're using 4 gpus for training and the batch size for 48 | # each gpu is 16/4 = 4 49 | dataloader.train.total_batch_size = 16 50 | 51 | # dump the testing results into output_dir for visualization 52 | dataloader.evaluator.output_dir = train.output_dir 53 | -------------------------------------------------------------------------------- /projects/dab_deformable_detr/configs/dab_deformable_detr_r50_two_stage_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_deformable_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dab_deformable_detr_r50_two_stage_50ep" 12 | 13 | # modify model config 14 | model.as_two_stage = True 15 | 16 | # modify loss weight dict 17 | # this is an hack implementation which will be improved in the future 18 | aux_weight_dict = { 19 | "loss_class_enc": 1.0, 20 | "loss_bbox_enc": 5.0, 21 | "loss_giou_enc": 2.0, 22 | } 23 | model.criterion.weight_dict.update(aux_weight_dict) 24 | -------------------------------------------------------------------------------- /projects/dab_deformable_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .dab_deformable_transformer import ( 18 | DabDeformableDetrTransformerEncoder, 19 | DabDeformableDetrTransformerDecoder, 20 | DabDeformableDetrTransformer, 21 | ) 22 | from .dab_deformable_detr import DabDeformableDETR 23 | from .two_stage_criterion import TwoStageCriterion 24 | -------------------------------------------------------------------------------- /projects/dab_detr/assets/dab_detr_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_detr/assets/dab_detr_details.png -------------------------------------------------------------------------------- /projects/dab_detr/assets/dab_detr_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dab_detr/assets/dab_detr_overall.png -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r101_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "path/to/R-101.pkl" 11 | train.output_dir = "./output/dab_detr_r101_50ep" 12 | 13 | # modify model config 14 | model.backbone.stages.depth = 101 15 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r101_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.dab_detr_r50_dc5 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth" 11 | train.output_dir = "./output/dab_detr_r101_dc5_50ep" 12 | 13 | # modify model 14 | model.backbone.name = "resnet101" -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r50_3patterns_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dab_detr_r50_3patterns_50ep" 12 | 13 | # using 3 pattern embeddings as in Anchor-DETR 14 | model.transformer.num_patterns = 3 -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dab_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # initialize checkpoint to be loaded 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dab_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r50_dc5_3patterns_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_dc5_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/dab_detr_r50_dc5_3patterns_50ep" 12 | 13 | # using 3 pattern embeddings as in Anchor-DETR 14 | model.transformer.num_patterns = 3 15 | 16 | # modify model 17 | model.position_embedding.temperature = 20 -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_r50_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.dab_detr_r50_dc5 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/dab_detr_r50_dc5_50ep" 12 | 13 | # modify model 14 | # DAB-DETR using 10 temperature for DC5 model 15 | model.position_embedding.temperature = 10 -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_swin_b_in21k_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.dab_detr_swin_base import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/swin/swin_base_patch4_window7_224_22k.pth" 11 | train.output_dir = "./output/dab_detr_swin_b_in21k_50ep" 12 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/dab_detr_swin_t_in1k_50ep.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.dab_detr_swin_tiny import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "path/to/swin_tiny_patch4_window7_224.pth" 11 | train.output_dir = "./output/dab_detr_swin_tiny_in1k_50ep" 12 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/models/dab_detr_r50_3patterns.py: -------------------------------------------------------------------------------- 1 | from .dab_detr_r50 import model 2 | 3 | 4 | # using 3 pattern embeddings as in Anchor-DETR 5 | model.transformer.num_patterns = 3 6 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/models/dab_detr_r50_dc5.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet 3 | 4 | from .dab_detr_r50 import model 5 | 6 | 7 | model.backbone=L(TorchvisionResNet)( 8 | name="resnet50", 9 | train_backbone=True, 10 | dilation=True, 11 | return_layers={"layer4": "res5"} 12 | ) 13 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/models/dab_detr_swin_base.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.modeling.backbone import SwinTransformer 3 | 4 | from .dab_detr_r50 import model 5 | 6 | 7 | model.backbone = L(SwinTransformer)( 8 | embed_dim=128, 9 | depths=(2, 2, 18, 2), 10 | num_heads=(4, 8, 16, 32), 11 | drop_path_rate=0.4, 12 | out_indices=(3,), 13 | ) 14 | model.in_features = ["p3"] 15 | model.in_channels = 1024 16 | -------------------------------------------------------------------------------- /projects/dab_detr/configs/models/dab_detr_swin_tiny.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.modeling.backbone import SwinTransformer 3 | 4 | from .dab_detr_r50 import model 5 | 6 | 7 | model.backbone = L(SwinTransformer)( 8 | embed_dim=96, 9 | depths=(2, 2, 6, 2), 10 | num_heads=(3, 6, 12, 24), 11 | drop_path_rate=0.1, 12 | out_indices=(3,), 13 | ) 14 | model.in_features = ["p3"] 15 | model.in_channels = 768 16 | -------------------------------------------------------------------------------- /projects/dab_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .dab_detr import DABDETR 2 | from .dab_transformer import ( 3 | DabDetrTransformerEncoder, 4 | DabDetrTransformerDecoder, 5 | DabDetrTransformer, 6 | ) 7 | -------------------------------------------------------------------------------- /projects/deformable_detr/assets/deformable_detr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/deformable_detr/assets/deformable_detr.png -------------------------------------------------------------------------------- /projects/deformable_detr/configs/deformable_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.deformable_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 6 | optimizer = get_config("common/optim.py").AdamW 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/deformable_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/deformable_detr/configs/deformable_detr_r50_two_stage_50ep.py: -------------------------------------------------------------------------------- 1 | from .deformable_detr_r50_50ep import train, dataloader, optimizer, lr_multiplier, model 2 | 3 | # modify model config 4 | model.with_box_refine = True 5 | model.as_two_stage = True 6 | 7 | # modify training config 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 9 | train.output_dir = "./output/deformable_detr_r50_two_stage_50ep" 10 | -------------------------------------------------------------------------------- /projects/deformable_detr/configs/deformable_detr_r50_with_box_refinement_50ep.py: -------------------------------------------------------------------------------- 1 | from .deformable_detr_r50_50ep import train, dataloader, optimizer, lr_multiplier, model 2 | 3 | # modify model config 4 | model.with_box_refine = True 5 | 6 | # modify training config 7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 8 | train.output_dir = "./output/deformable_detr_with_box_refinement_50ep" 9 | -------------------------------------------------------------------------------- /projects/deformable_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .deformable_transformer import ( 17 | DeformableDetrTransformerEncoder, 18 | DeformableDetrTransformerDecoder, 19 | DeformableDetrTransformer, 20 | ) 21 | from .deformable_detr import DeformableDETR 22 | from .deformable_criterion import DeformableCriterion 23 | -------------------------------------------------------------------------------- /projects/deta/assets/deta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/deta/assets/deta.png -------------------------------------------------------------------------------- /projects/deta/configs/deta_r50_5scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.deta_r50 import model 3 | from .scheduler.coco_scheduler import lr_multiplier_12ep_10drop as lr_multiplier 4 | 5 | # using the default optimizer and dataloader 6 | dataloader = get_config("common/data/coco_detr.py").dataloader 7 | optimizer = get_config("common/optim.py").AdamW 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 12 | train.output_dir = "./output/deta_r50_5scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 7500 17 | train.checkpointer.period = 7500 18 | 19 | # set training devices 20 | train.device = "cuda" 21 | model.device = train.device 22 | 23 | # modify dataloader config 24 | dataloader.train.num_workers = 16 25 | 26 | # please notice that this is total batch size. 27 | # surpose you're using 4 gpus for training and the batch size for 28 | # each gpu is 16/4 = 4 29 | dataloader.train.total_batch_size = 16 30 | 31 | -------------------------------------------------------------------------------- /projects/deta/configs/deta_r50_5scale_12ep_bs8.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.deta_r50 import model 3 | from .scheduler.coco_scheduler import lr_multiplier_12ep_8bs_scheduler as lr_multiplier 4 | 5 | # using the default optimizer and dataloader 6 | dataloader = get_config("common/data/coco_detr.py").dataloader 7 | optimizer = get_config("common/optim.py").AdamW 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 12 | train.output_dir = "./output/deta_r50_5scale_12ep_bs8" 13 | 14 | # max training iterations 15 | train.max_iter = 180000 16 | train.eval_period = 15000 17 | train.checkpointer.period = 15000 18 | 19 | 20 | # only freeze stem during training 21 | model.backbone.freeze_at = 1 22 | 23 | 24 | # modify optimizer config 25 | optimizer.lr = 1e-4 26 | optimizer.betas = (0.9, 0.999) 27 | optimizer.weight_decay = 1e-4 28 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 29 | 30 | # please notice that this is total batch size. 31 | # surpose you're using 4 gpus for training and the batch size for 32 | # each gpu is 16/4 = 4 33 | dataloader.train.total_batch_size = 8 34 | 35 | -------------------------------------------------------------------------------- /projects/deta/configs/deta_r50_5scale_no_frozen_backbone.py: -------------------------------------------------------------------------------- 1 | from .deta_r50_5scale_12ep import ( 2 | model, 3 | train, 4 | dataloader, 5 | lr_multiplier 6 | ) 7 | 8 | model.backbone.freeze_at = 1 9 | -------------------------------------------------------------------------------- /projects/deta/configs/deta_swin_large_finetune_24ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .deta_r50_5scale_12ep import ( 3 | train, 4 | optimizer, 5 | ) 6 | 7 | from .models.deta_swin import model 8 | from .data.coco_detr_larger import dataloader 9 | 10 | # 24ep for finetuning 11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep 12 | 13 | # modify learning rate 14 | optimizer.lr = 5e-5 15 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 16 | 17 | -------------------------------------------------------------------------------- /projects/deta/configs/improved_deformable_detr_baseline_50ep.py: -------------------------------------------------------------------------------- 1 | from .deta_r50_5scale_12ep import ( 2 | train, 3 | model, 4 | dataloader, 5 | lr_multiplier, 6 | optimizer, 7 | ) 8 | 9 | model.transformer.assign_first_stage = False 10 | model.criterion.assign_first_stage = False 11 | model.criterion.assign_second_stage = False 12 | -------------------------------------------------------------------------------- /projects/deta/configs/models/deta_swin.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .deta_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=384, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | window_size=12, 15 | out_indices=(1, 2, 3), 16 | ) 17 | 18 | # modify neck config 19 | model.neck.input_shapes = { 20 | "p1": ShapeSpec(channels=384), 21 | "p2": ShapeSpec(channels=768), 22 | "p3": ShapeSpec(channels=1536), 23 | } 24 | model.neck.in_features = ["p1", "p2", "p3"] 25 | -------------------------------------------------------------------------------- /projects/deta/configs/scheduler/coco_scheduler.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver import WarmupParamScheduler 5 | 6 | 7 | def default_coco_scheduler(epochs=50, decay_epochs=40, warmup_epochs=0): 8 | """ 9 | Returns the config for a default multi-step LR scheduler such as "50epochs", 10 | commonly referred to in papers, where every 1x has the total length of 1440k 11 | training images (~12 COCO epochs). LR is decayed once at the end of training. 12 | 13 | Args: 14 | epochs (int): total training epochs. 15 | decay_epochs (int): lr decay steps. 16 | warmup_epochs (int): warmup epochs. 17 | 18 | Returns: 19 | DictConfig: configs that define the multiplier for LR during training 20 | """ 21 | # total number of iterations assuming 16 batch size, using 1440000/16=90000 22 | total_steps_16bs = epochs * 7500 23 | decay_steps = decay_epochs * 7500 24 | warmup_steps = warmup_epochs * 7500 25 | scheduler = L(MultiStepParamScheduler)( 26 | values=[1.0, 0.1], 27 | milestones=[decay_steps, total_steps_16bs], 28 | ) 29 | return L(WarmupParamScheduler)( 30 | scheduler=scheduler, 31 | warmup_length=warmup_steps / total_steps_16bs, 32 | warmup_method="linear", 33 | warmup_factor=0.001, 34 | ) 35 | 36 | 37 | # default scheduler for detr 38 | lr_multiplier_12ep_10drop = default_coco_scheduler(12, 10, 0) 39 | lr_multiplier_12ep_8bs_scheduler = default_coco_scheduler(24, 20, 0) 40 | -------------------------------------------------------------------------------- /projects/deta/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .deformable_transformer import ( 17 | DeformableDetrTransformerEncoder, 18 | DeformableDetrTransformerDecoder, 19 | DeformableDetrTransformer, 20 | ) 21 | from .deformable_detr import DeformableDETR 22 | from .deta_criterion import DETACriterion 23 | from .assigner import Stage1Assigner, Stage2Assigner -------------------------------------------------------------------------------- /projects/detr/assets/DETR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/detr/assets/DETR.png -------------------------------------------------------------------------------- /projects/detr/configs/detr_r101_300ep.py: -------------------------------------------------------------------------------- 1 | from .detr_r50_300ep import train, dataloader, optimizer, lr_multiplier, model 2 | 3 | # modify model config 4 | model.backbone.stages.depth = 101 5 | 6 | # modify training config 7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 8 | train.output_dir = "./output/detr_r101_300ep" 9 | -------------------------------------------------------------------------------- /projects/detr/configs/detr_r101_dc5_300ep.py: -------------------------------------------------------------------------------- 1 | from .detr_r50_300ep import dataloader, lr_multiplier, optimizer, train 2 | 3 | from .models.detr_r50_dc5 import model 4 | 5 | # modify training config 6 | # using torchvision official checkpoint 7 | # the urls can be found in: https://pytorch.org/vision/stable/models/resnet.html 8 | 9 | train.init_checkpoint = "https://download.pytorch.org/models/resnet101-63fe2227.pth" 10 | train.output_dir = "./output/detr_r50_dc5_300ep" 11 | 12 | # modify model 13 | model.backbone.name = "resnet101" -------------------------------------------------------------------------------- /projects/detr/configs/detr_r50_300ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 6 | optimizer = get_config("common/optim.py").AdamW 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/detr_r50_300ep" 12 | train.max_iter = 554400 13 | 14 | # modify lr_multiplier 15 | lr_multiplier.scheduler.milestones = [369600, 554400] 16 | 17 | # modify optimizer config 18 | optimizer.weight_decay = 1e-4 19 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 20 | 21 | # modify dataloader config 22 | dataloader.train.num_workers = 16 23 | dataloader.train.total_batch_size = 64 24 | -------------------------------------------------------------------------------- /projects/detr/configs/detr_r50_dc5_300ep.py: -------------------------------------------------------------------------------- 1 | from .detr_r50_300ep import dataloader, lr_multiplier, optimizer, train 2 | 3 | from .models.detr_r50_dc5 import model 4 | 5 | # modify training config 6 | # using torchvision official checkpoint 7 | # the urls can be found in: https://pytorch.org/vision/stable/models/resnet.html 8 | 9 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 10 | train.output_dir = "./output/detr_r50_dc5_300ep" 11 | 12 | -------------------------------------------------------------------------------- /projects/detr/configs/models/detr_r50_dc5.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | 3 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet 4 | 5 | from .detr_r50 import model 6 | 7 | model.backbone=L(TorchvisionResNet)( 8 | name="resnet50", 9 | train_backbone=True, 10 | dilation=True, 11 | return_layers={"layer4": "res5"} 12 | ) 13 | -------------------------------------------------------------------------------- /projects/detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .detr import DETR 2 | from .transformer import ( 3 | DetrTransformerEncoder, 4 | DetrTransformerDecoder, 5 | DetrTransformer, 6 | ) 7 | -------------------------------------------------------------------------------- /projects/dino/assets/dino_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino/assets/dino_arch.png -------------------------------------------------------------------------------- /projects/dino/configs/dino-convnext/dino_convnext_base_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_convnext_large_384_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to convnext-base version 12 | model.backbone.depths = [3, 3, 27, 3] 13 | model.backbone.dims = [128, 256, 512, 1024] 14 | 15 | # modify neck config 16 | model.neck.input_shapes = { 17 | "p1": ShapeSpec(channels=256), 18 | "p2": ShapeSpec(channels=512), 19 | "p3": ShapeSpec(channels=1024), 20 | } 21 | model.neck.in_features = ["p1", "p2", "p3"] 22 | 23 | # modify training config 24 | train.init_checkpoint = "/path/to/convnext_base_22k_1k_384.pth" 25 | train.output_dir = "./output/dino_convnext_base_384_4scale_12ep" 26 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-convnext/dino_convnext_large_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_convnext import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | # use convnext-large-384 as default 12 | train.init_checkpoint = "/path/to/convnext_large_22k_1k_384.pth" 13 | train.output_dir = "./output/dino_convnext_large_4scale_12ep" 14 | 15 | # max training iterations 16 | train.max_iter = 90000 17 | train.eval_period = 5000 18 | train.log_period = 20 19 | train.checkpointer.period = 5000 20 | 21 | # gradient clipping for training 22 | train.clip_grad.enabled = True 23 | train.clip_grad.params.max_norm = 0.1 24 | train.clip_grad.params.norm_type = 2 25 | 26 | # set training devices 27 | train.device = "cuda" 28 | model.device = train.device 29 | 30 | # modify optimizer config 31 | optimizer.lr = 1e-4 32 | optimizer.betas = (0.9, 0.999) 33 | optimizer.weight_decay = 1e-4 34 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 35 | 36 | # modify dataloader config 37 | dataloader.train.num_workers = 16 38 | 39 | # please notice that this is total batch size. 40 | # surpose you're using 4 gpus for training and the batch size for 41 | # each gpu is 16/4 = 4 42 | dataloader.train.total_batch_size = 16 43 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-convnext/dino_convnext_small_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_convnext_large_384_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to convnext-small version 12 | model.backbone.depths = [3, 3, 27, 3] 13 | model.backbone.dims = [96, 192, 384, 768] 14 | 15 | # modify neck config 16 | model.neck.input_shapes = { 17 | "p1": ShapeSpec(channels=192), 18 | "p2": ShapeSpec(channels=384), 19 | "p3": ShapeSpec(channels=768), 20 | } 21 | model.neck.in_features = ["p1", "p2", "p3"] 22 | 23 | # modify training config 24 | train.init_checkpoint = "/path/to/convnext_small_22k_1k_384.pth" 25 | train.output_dir = "./output/dino_convnext_small_384_4scale_12ep" 26 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-convnext/dino_convnext_tiny_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_convnext_large_384_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to tiny version 12 | model.backbone.depths = [3, 3, 9, 3] 13 | model.backbone.dims = [96, 192, 384, 768] 14 | 15 | # modify neck config 16 | model.neck.input_shapes = { 17 | "p1": ShapeSpec(channels=192), 18 | "p2": ShapeSpec(channels=384), 19 | "p3": ShapeSpec(channels=768), 20 | } 21 | model.neck.in_features = ["p1", "p2", "p3"] 22 | 23 | # modify training config 24 | train.init_checkpoint = "/path/to/convnext_tiny_22k_1k_384.pth" 25 | train.output_dir = "./output/dino_convnext_tiny_384_4scale_12ep" 26 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-eva-01/dino_eva_01_1536_4scale_12ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino/configs/dino-eva-01/dino_eva_01_1536_4scale_12ep.py -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focal_base_lrf_fl3_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 3 | train, 4 | dataloader, 5 | optimizer, 6 | lr_multiplier, 7 | model, 8 | ) 9 | from .focalnet import FocalNet 10 | from detectron2.config import LazyCall as L 11 | 12 | 13 | # modify training config 14 | train.init_checkpoint = "/path/to/focalnet_base_lrf.pth" 15 | train.output_dir = "./output/dino_focal_small_lrf_fl3_4scale_12ep" 16 | 17 | 18 | # convert to focal-small 3level 19 | # model.backbone.embed_dim = 128 20 | # model.backbone.depths = (2, 2, 18, 2) 21 | # model.backbone.focal_levels = (3, 3, 3, 3) 22 | # model.backbone.focal_windows = (3, 3, 3, 3) 23 | # model.backbone.drop_path_rate = 0.1 24 | # model.backbone.use_conv_embed = False 25 | # model.backbone.patch_norm = True 26 | # model.backbone.use_postln = False 27 | 28 | model.backbone = L(FocalNet)( 29 | embed_dim=128, 30 | depths=(2, 2, 18, 2), 31 | focal_levels=(3, 3, 3, 3), 32 | focal_windows=(3, 3, 3, 3), 33 | drop_path_rate=0.1, 34 | use_conv_embed=False, 35 | out_indices=(1, 2, 3), 36 | ) 37 | 38 | # modify neck config 39 | model.neck.input_shapes = { 40 | "p1": ShapeSpec(channels=256), 41 | "p2": ShapeSpec(channels=512), 42 | "p3": ShapeSpec(channels=1024), 43 | } 44 | model.neck.in_features = ["p1", "p2", "p3"] 45 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focal_small_lrf_fl3_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 3 | train, 4 | dataloader, 5 | optimizer, 6 | lr_multiplier, 7 | model, 8 | ) 9 | from .focalnet import FocalNet 10 | from detectron2.config import LazyCall as L 11 | 12 | 13 | # modify training config 14 | train.init_checkpoint = "/path/to/focalnet_small_lrf.pth" 15 | train.output_dir = "./output/dino_focal_small_lrf_fl3_4scale_12ep" 16 | 17 | 18 | # convert to focal-small 3level 19 | # model.backbone.embed_dim = 96 20 | # model.backbone.depths = (2, 2, 18, 2) 21 | # model.backbone.focal_levels = (3, 3, 3, 3) 22 | # model.backbone.focal_windows = (3, 3, 3, 3) 23 | # model.backbone.drop_path_rate = 0.1 24 | # model.backbone.use_conv_embed = False 25 | # model.backbone.patch_norm = True 26 | # model.backbone.use_postln = False 27 | 28 | model.backbone = L(FocalNet)( 29 | embed_dim=96, 30 | depths=(2, 2, 18, 2), 31 | focal_levels=(3, 3, 3, 3), 32 | focal_windows=(3, 3, 3, 3), 33 | drop_path_rate=0.1, 34 | use_conv_embed=False, 35 | out_indices=(1, 2, 3), 36 | ) 37 | 38 | # modify neck config 39 | model.neck.input_shapes = { 40 | "p1": ShapeSpec(channels=192), 41 | "p2": ShapeSpec(channels=384), 42 | "p3": ShapeSpec(channels=768), 43 | } 44 | model.neck.in_features = ["p1", "p2", "p3"] 45 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focal_tiny_lrf_fl3_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 3 | train, 4 | dataloader, 5 | optimizer, 6 | lr_multiplier, 7 | model, 8 | ) 9 | from .focalnet import FocalNet 10 | from detectron2.config import LazyCall as L 11 | 12 | 13 | # modify training config 14 | train.init_checkpoint = "/path/to/focalnet_tiny_lrf.pth" 15 | train.output_dir = "./output/dino_focal_tiny_lrf_fl3_4scale_12ep" 16 | 17 | 18 | # convert to focal-tiny 3level 19 | # model.backbone.embed_dim = 96 20 | # model.backbone.depths = (2, 2, 6, 2) 21 | # model.backbone.focal_levels = (3, 3, 3, 3) 22 | # model.backbone.focal_windows = (3, 3, 3, 3) 23 | # model.backbone.drop_path_rate = 0.1 24 | # model.backbone.use_conv_embed = False 25 | # model.backbone.patch_norm = True 26 | # model.backbone.use_postln = False 27 | 28 | model.backbone = L(FocalNet)( 29 | embed_dim=96, 30 | depths=(2, 2, 6, 2), 31 | focal_levels=(3, 3, 3, 3), 32 | focal_windows=(3, 3, 3, 3), 33 | drop_path_rate=0.1, 34 | use_conv_embed=False, 35 | out_indices=(1, 2, 3), 36 | ) 37 | 38 | # modify neck config 39 | model.neck.input_shapes = { 40 | "p1": ShapeSpec(channels=192), 41 | "p2": ShapeSpec(channels=384), 42 | "p3": ShapeSpec(channels=768), 43 | } 44 | model.neck.in_features = ["p1", "p2", "p3"] 45 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_focalnet import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384.pth" 12 | train.output_dir = "./output/dino_focalnet_large_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_4scale_36ep.py: -------------------------------------------------------------------------------- 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 2 | model, 3 | dataloader, 4 | train, 5 | lr_multiplier, 6 | optimizer 7 | ) 8 | 9 | from detrex.config import get_config 10 | 11 | # using 36ep scheduler 12 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 13 | 14 | # modify training config 15 | train.max_iter = 270000 16 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384.pth" 17 | train.output_dir = "./output/dino_focalnet_large_4scale_36ep" 18 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth" 12 | train.output_dir = "./output/dino_focalnet_large_fl4_4scale_12ep" 13 | 14 | 15 | # convert to 4 focal-level 16 | model.backbone.focal_levels = (4, 4, 4, 4) 17 | model.backbone.focal_windows = (3, 3, 3, 3) 18 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_5scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .dino_focalnet_large_lrf_384_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | from detectron2.layers import ShapeSpec 10 | 11 | 12 | # modify training config 13 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth" 14 | train.output_dir = "./output/dino_focalnet_large_fl4_5scale_12ep" 15 | 16 | # convert to 4 focal-level 17 | model.backbone.focal_levels = (4, 4, 4, 4) 18 | model.backbone.focal_windows = (3, 3, 3, 3) 19 | 20 | # convert to 5 scale output features 21 | model.backbone.out_indices = (0, 1, 2, 3) 22 | model.neck.input_shapes = { 23 | "p0": ShapeSpec(channels=192), 24 | "p1": ShapeSpec(channels=384), 25 | "p2": ShapeSpec(channels=768), 26 | "p3": ShapeSpec(channels=1536), 27 | } 28 | model.neck.in_features = ["p0", "p1", "p2", "p3"] 29 | model.neck.num_outs = 5 30 | model.transformer.num_feature_levels = 5 31 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-focal/dino_focalnet_large_lrf_384_fl4_5scale_36ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | 3 | from .dino_focalnet_large_lrf_384_fl4_5scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | model, 8 | ) 9 | 10 | # using 36ep scheduler 11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 12 | 13 | # modify training config 14 | train.max_iter = 270000 15 | train.init_checkpoint = "/path/to/focalnet_large_lrf_384_fl4.pth" 16 | train.output_dir = "./output/dino_focalnet_large_fl4_5scale_36ep" 17 | 18 | # using larger drop-path rate for longer training times 19 | model.backbone.drop_path_rate = 0.4 20 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-internimage/dino_internimage_base_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_internimage_large_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to internimage-small version 12 | model.backbone.channels = 112 13 | model.backbone.depths = [4, 4, 21, 4] 14 | model.backbone.groups = [7, 14, 28, 56] 15 | model.backbone.offset_scale = 1.0 16 | model.backbone.drop_path_rate = 0.1 17 | model.backbone.post_norm = True 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=224), 22 | "p2": ShapeSpec(channels=448), 23 | "p3": ShapeSpec(channels=896), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | 27 | # modify training config 28 | train.init_checkpoint = "/path/to/internimage_b_1k_224.pth" 29 | train.output_dir = "./output/dino_internimage_base_4scale_12ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-internimage/dino_internimage_large_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_internimage import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/internimage_l_22kto1k_384.pth" 12 | train.output_dir = "./output/dino_internimage_large_384_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-internimage/dino_internimage_small_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_internimage_large_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to internimage-small version 12 | model.backbone.channels = 80 13 | model.backbone.depths = [4, 4, 21, 4] 14 | model.backbone.groups = [5, 10, 20, 40] 15 | model.backbone.offset_scale = 1.0 16 | model.backbone.drop_path_rate = 0.1 17 | model.backbone.post_norm = True 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=160), 22 | "p2": ShapeSpec(channels=320), 23 | "p3": ShapeSpec(channels=640), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | 27 | # modify training config 28 | train.init_checkpoint = "/path/to/internimage_s_1k_224.pth" 29 | train.output_dir = "./output/dino_internimage_small_4scale_12ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-internimage/dino_internimage_tiny_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | 3 | from .dino_internimage_large_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | model, 9 | ) 10 | 11 | # modify model to internimage-tiny version 12 | model.backbone.channels = 64 13 | model.backbone.depths = [4, 4, 18, 4] 14 | model.backbone.groups = [4, 8, 16, 32] 15 | model.backbone.offset_scale = 1.0 16 | model.backbone.drop_path_rate = 0.1 17 | model.backbone.post_norm = False 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=128), 22 | "p2": ShapeSpec(channels=256), 23 | "p3": ShapeSpec(channels=512), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | 27 | # modify training config 28 | train.init_checkpoint = "/path/to/internimage_t_1k_224.pth" 29 | train.output_dir = "./output/dino_internimage_tiny_4scale_12ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r101_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .dino_r50_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "/path/to/r101.pkl" 11 | train.output_dir = "./output/dino_r101_4scale_12ep" 12 | 13 | # modify model config 14 | model.backbone.stages.depth = 101 15 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_r50 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 12 | train.output_dir = "./output/dino_r50_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | 43 | # dump the testing results into output_dir for visualization 44 | dataloader.evaluator.output_dir = train.output_dir 45 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_300dn.py: -------------------------------------------------------------------------------- 1 | from .dino_r50_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify model config 10 | model.dn_number = 300 11 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_better_hyper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from .dino_r50_4scale_12ep import ( 3 | train, 4 | dataloader, 5 | optimizer, 6 | lr_multiplier, 7 | model, 8 | ) 9 | 10 | # no frozen backbone get better results 11 | model.backbone.freeze_at = -1 12 | 13 | # more dn queries, set 300 here 14 | model.dn_number = 300 15 | 16 | # use 2.0 for class weight 17 | model.criterion.weight_dict = { 18 | "loss_class": 2.0, 19 | "loss_bbox": 5.0, 20 | "loss_giou": 2.0, 21 | "loss_class_dn": 1, 22 | "loss_bbox_dn": 5.0, 23 | "loss_giou_dn": 2.0, 24 | } 25 | 26 | # set aux loss weight dict 27 | base_weight_dict = copy.deepcopy(model.criterion.weight_dict) 28 | if model.aux_loss: 29 | weight_dict = model.criterion.weight_dict 30 | aux_weight_dict = {} 31 | aux_weight_dict.update({k + "_enc": v for k, v in base_weight_dict.items()}) 32 | for i in range(model.transformer.decoder.num_layers - 1): 33 | aux_weight_dict.update({k + f"_{i}": v for k, v in base_weight_dict.items()}) 34 | weight_dict.update(aux_weight_dict) 35 | model.criterion.weight_dict = weight_dict 36 | 37 | # output dir 38 | train.output_dir = "./output/dino_r50_4scale_12ep_better_hyper" -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_4scale_12ep_no_frozen.py: -------------------------------------------------------------------------------- 1 | from .dino_r50_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # no frozen backbone get better results 10 | model.backbone.freeze_at = -1 11 | 12 | train.output_dir = "./output/dino_r50_4scale_12ep_no_frozen_backbone" -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_4scale_24ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .dino_r50_4scale_12ep import ( 3 | train, 4 | dataloader, 5 | optimizer, 6 | model, 7 | ) 8 | 9 | # get default config 10 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep 11 | 12 | # modify model config 13 | # use the original implementation of dab-detr position embedding in 24 epochs training. 14 | model.position_embedding.temperature = 20 15 | model.position_embedding.offset = 0.0 16 | 17 | # modify training config 18 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 19 | train.output_dir = "./output/dino_r50_4scale_24ep" 20 | 21 | # max training iterations 22 | train.max_iter = 180000 23 | 24 | # modify dataloader config 25 | # not filter empty annotations during training 26 | dataloader.train.dataset.filter_empty = False 27 | dataloader.train.num_workers = 16 28 | 29 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-resnet/dino_r50_5scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .dino_r50_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | from detectron2.layers import ShapeSpec 10 | 11 | # modify model config to generate 4 scale backbone features 12 | # and 5 scale input features 13 | model.backbone.out_features = ["res2", "res3", "res4", "res5"] 14 | 15 | model.neck.input_shapes = { 16 | "res2": ShapeSpec(channels=256), 17 | "res3": ShapeSpec(channels=512), 18 | "res4": ShapeSpec(channels=1024), 19 | "res5": ShapeSpec(channels=2048), 20 | } 21 | model.neck.in_features = ["res2", "res3", "res4", "res5"] 22 | model.neck.num_outs = 5 23 | model.transformer.num_feature_levels = 5 24 | 25 | # modify training config 26 | train.output_dir = "./output/dino_r50_5scale_12ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_base_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_base_384 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_base_patch4_window12_384_22kto1k.pth" 12 | train.output_dir = "./output/dino_swin_base_384_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_large_224_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_large_224 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_large_patch4_window7_224_22kto1k.pth" 12 | train.output_dir = "./output/dino_swin_large_224_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_large_384_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_large_384 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_large_patch4_window12_384_22kto1k.pth" 12 | train.output_dir = "./output/dino_swin_large_384_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_large_384_4scale_36ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_large_384 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_large_patch4_window12_384_22kto1k.pth" 12 | train.output_dir = "./output/dino_swin_large_384_4scale_36ep" 13 | 14 | train.max_iter = 270000 15 | train.eval_period = 5000 16 | train.log_period = 20 17 | train.checkpointer.period = 5000 18 | 19 | # gradient clipping for training 20 | train.clip_grad.enabled = True 21 | train.clip_grad.params.max_norm = 0.1 22 | train.clip_grad.params.norm_type = 2 23 | 24 | # set training devices 25 | train.device = "cuda" 26 | model.device = train.device 27 | 28 | # modify optimizer config 29 | optimizer.lr = 1e-4 30 | optimizer.betas = (0.9, 0.999) 31 | optimizer.weight_decay = 1e-4 32 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 33 | 34 | # modify dataloader config 35 | dataloader.train.num_workers = 16 36 | 37 | # please notice that this is total batch size. 38 | # surpose you're using 4 gpus for training and the batch size for 39 | # each gpu is 16/4 = 4 40 | dataloader.train.total_batch_size = 16 41 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_large_384_5scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .dino_swin_large_384_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | from detectron2.layers import ShapeSpec 10 | 11 | # modify model config to generate 4 scale backbone features 12 | # and 5 scale input features 13 | model.backbone.out_indices = (0, 1, 2, 3) 14 | 15 | model.neck.input_shapes = { 16 | "p0": ShapeSpec(channels=192), 17 | "p1": ShapeSpec(channels=384), 18 | "p2": ShapeSpec(channels=768), 19 | "p3": ShapeSpec(channels=1536), 20 | } 21 | model.neck.in_features = ["p0", "p1", "p2", "p3"] 22 | model.neck.num_outs = 5 23 | model.transformer.num_feature_levels = 5 24 | 25 | # modify training config 26 | train.output_dir = "./output/dino_swin_large_384_5scale_12ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_large_384_5scale_36ep.py: -------------------------------------------------------------------------------- 1 | from .dino_swin_large_384_4scale_36ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | from detectron2.layers import ShapeSpec 10 | 11 | # modify model config to generate 4 scale backbone features 12 | # and 5 scale input features 13 | model.backbone.out_indices = (0, 1, 2, 3) 14 | 15 | model.neck.input_shapes = { 16 | "p0": ShapeSpec(channels=192), 17 | "p1": ShapeSpec(channels=384), 18 | "p2": ShapeSpec(channels=768), 19 | "p3": ShapeSpec(channels=1536), 20 | } 21 | model.neck.in_features = ["p0", "p1", "p2", "p3"] 22 | model.neck.num_outs = 5 23 | model.transformer.num_feature_levels = 5 24 | 25 | # modify training config 26 | train.output_dir = "./output/dino_swin_large_384_5scale_36ep" -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_small_224_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_small_224 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_small_patch4_window7_224.pth" 12 | train.output_dir = "./output/dino_swin_small_224_4scale_12ep" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-swin/dino_swin_tiny_224_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_swin_tiny_224 import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | # modify training config 11 | train.init_checkpoint = "/path/to/swin_tiny_patch4_window7_224_22kto1k_finetune.pth" 12 | train.output_dir = "./output/dino_swin_tiny_224_4scale_12ep_22kto1k_finetune" 13 | 14 | # max training iterations 15 | train.max_iter = 90000 16 | train.eval_period = 5000 17 | train.log_period = 20 18 | train.checkpointer.period = 5000 19 | 20 | # gradient clipping for training 21 | train.clip_grad.enabled = True 22 | train.clip_grad.params.max_norm = 0.1 23 | train.clip_grad.params.norm_type = 2 24 | 25 | # set training devices 26 | train.device = "cuda" 27 | model.device = train.device 28 | 29 | # modify optimizer config 30 | optimizer.lr = 1e-4 31 | optimizer.betas = (0.9, 0.999) 32 | optimizer.weight_decay = 1e-4 33 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 34 | 35 | # modify dataloader config 36 | dataloader.train.num_workers = 16 37 | 38 | # please notice that this is total batch size. 39 | # surpose you're using 4 gpus for training and the batch size for 40 | # each gpu is 16/4 = 4 41 | dataloader.train.total_batch_size = 16 42 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-vitdet/dino_vitdet_base_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from ..models.dino_vitdet import model 3 | 4 | # get default config 5 | dataloader = get_config("common/data/coco_detr.py").dataloader 6 | optimizer = get_config("common/optim.py").AdamW 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | train = get_config("common/train.py").train 9 | 10 | 11 | # modify training config 12 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth" 13 | train.output_dir = "./output/dino_vitdet_base_12ep" 14 | 15 | # max training iterations 16 | train.max_iter = 90000 17 | 18 | # run evaluation every 5000 iters 19 | train.eval_period = 5000 20 | 21 | # log training infomation every 20 iters 22 | train.log_period = 20 23 | 24 | # save checkpoint every 5000 iters 25 | train.checkpointer.period = 5000 26 | 27 | # gradient clipping for training 28 | train.clip_grad.enabled = True 29 | train.clip_grad.params.max_norm = 0.1 30 | train.clip_grad.params.norm_type = 2 31 | 32 | # set training devices 33 | train.device = "cuda" 34 | model.device = train.device 35 | 36 | # modify optimizer config 37 | optimizer.lr = 1e-4 38 | optimizer.betas = (0.9, 0.999) 39 | optimizer.weight_decay = 1e-4 40 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 41 | 42 | # modify dataloader config 43 | dataloader.train.num_workers = 16 44 | 45 | # please notice that this is total batch size. 46 | # surpose you're using 4 gpus for training and the batch size for 47 | # each gpu is 16/4 = 4 48 | dataloader.train.total_batch_size = 16 49 | 50 | # dump the testing results into output_dir for visualization 51 | dataloader.evaluator.output_dir = train.output_dir 52 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-vitdet/dino_vitdet_base_4scale_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | 3 | from .dino_vitdet_base_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | model, 8 | ) 9 | 10 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 11 | 12 | 13 | # modify training config 14 | train.max_iter = 375000 15 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth" 16 | train.output_dir = "./output/dino_vitdet_base_50ep" 17 | -------------------------------------------------------------------------------- /projects/dino/configs/dino-vitdet/dino_vitdet_large_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | from detectron2.config import LazyCall as L 3 | from detectron2.solver import WarmupParamScheduler 4 | 5 | from .dino_vitdet_base_4scale_12ep import ( 6 | train, 7 | dataloader, 8 | optimizer, 9 | lr_multiplier, 10 | model, 11 | ) 12 | 13 | # convert vitdet-base to vitdet-large 14 | model.backbone.net.embed_dim = 1024 15 | model.backbone.net.depth = 24 16 | model.backbone.net.num_heads = 16 17 | model.backbone.net.drop_path_rate = 0.4 18 | # 5, 11, 17, 23 for global attention 19 | model.backbone.net.window_block_indexes = ( 20 | list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23)) 21 | ) 22 | 23 | # modify training config 24 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth" 25 | train.output_dir = "./output/dino_vitdet_large_12ep" 26 | 27 | 28 | # use warmup lr scheduler 29 | lr_multiplier = L(WarmupParamScheduler)( 30 | scheduler=L(MultiStepParamScheduler)( 31 | values=[1.0, 0.1], 32 | milestones=[300000, 375000], 33 | ), 34 | warmup_length=250 / train.max_iter, 35 | warmup_factor=0.001, 36 | ) -------------------------------------------------------------------------------- /projects/dino/configs/dino-vitdet/dino_vitdet_large_4scale_50ep.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | from detectron2.config import LazyCall as L 3 | from detectron2.solver import WarmupParamScheduler 4 | 5 | from .dino_vitdet_large_4scale_12ep import ( 6 | train, 7 | dataloader, 8 | optimizer, 9 | model, 10 | ) 11 | 12 | 13 | # modify training config 14 | train.max_iter = 375000 15 | train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth" 16 | train.output_dir = "./output/dino_vitdet_large_50ep" 17 | 18 | # use warmup lr scheduler 19 | lr_multiplier = L(WarmupParamScheduler)( 20 | scheduler=L(MultiStepParamScheduler)( 21 | values=[1.0, 0.1], 22 | milestones=[300000, 375000], 23 | ), 24 | warmup_length=250 / train.max_iter, 25 | warmup_factor=0.001, 26 | ) -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_convnext.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detrex.modeling.backbone import ConvNeXt 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # convnext-large-4scale baseline 9 | model.backbone = L(ConvNeXt)( 10 | in_chans=3, 11 | depths=[3, 3, 27, 3], 12 | dims=[192, 384, 768, 1536], 13 | drop_path_rate=0.0, 14 | layer_scale_init_value=1.0, 15 | out_indices=[1, 2, 3], 16 | ) 17 | 18 | # modify neck config 19 | model.neck.input_shapes = { 20 | "p1": ShapeSpec(channels=384), 21 | "p2": ShapeSpec(channels=768), 22 | "p3": ShapeSpec(channels=1536), 23 | } 24 | model.neck.in_features = ["p1", "p2", "p3"] 25 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_eva_01.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch.nn as nn 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool 6 | from detrex.modeling.backbone import EVAViT, SimpleFeaturePyramid 7 | 8 | from .dino_r50 import model 9 | 10 | 11 | # Base 12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1 13 | 14 | # EVA-01 15 | model.backbone = L(SimpleFeaturePyramid)( 16 | net=L(EVAViT)( 17 | img_size=1024, 18 | patch_size=16, 19 | embed_dim=embed_dim, 20 | depth=depth, 21 | num_heads=num_heads, 22 | drop_path_rate=dp, 23 | window_size=14, 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | norm_layer=partial(nn.LayerNorm, eps=1e-6), 27 | window_block_indexes=[ 28 | # 2, 5, 8 11 for global attention 29 | 0, 30 | 1, 31 | 3, 32 | 4, 33 | 6, 34 | 7, 35 | 9, 36 | 10, 37 | ], 38 | residual_block_indexes=[], 39 | use_rel_pos=True, 40 | out_feature="last_feat", 41 | ), 42 | in_feature="${.net.out_feature}", 43 | out_channels=256, 44 | scale_factors=(2.0, 1.0, 0.5), # (4.0, 2.0, 1.0, 0.5) in ViTDet 45 | top_block=L(LastLevelMaxPool)(), 46 | norm="LN", 47 | square_pad=1024, 48 | ) 49 | 50 | # modify neck config 51 | model.neck.input_shapes = { 52 | "p3": ShapeSpec(channels=256), 53 | "p4": ShapeSpec(channels=256), 54 | "p5": ShapeSpec(channels=256), 55 | "p6": ShapeSpec(channels=256), 56 | } 57 | model.neck.in_features = ["p3", "p4", "p5", "p6"] 58 | model.neck.num_outs = 4 59 | model.transformer.num_feature_levels = 4 60 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_focalnet.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detrex.modeling.backbone import FocalNet 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # focalnet-large-4scale baseline 9 | model.backbone = L(FocalNet)( 10 | embed_dim=192, 11 | depths=(2, 2, 18, 2), 12 | focal_levels=(3, 3, 3, 3), 13 | focal_windows=(5, 5, 5, 5), 14 | use_conv_embed=True, 15 | use_postln=True, 16 | use_postln_in_modulation=False, 17 | use_layerscale=True, 18 | normalize_modulator=False, 19 | out_indices=(1, 2, 3), 20 | ) 21 | 22 | # modify neck config 23 | model.neck.input_shapes = { 24 | "p1": ShapeSpec(channels=384), 25 | "p2": ShapeSpec(channels=768), 26 | "p3": ShapeSpec(channels=1536), 27 | } 28 | model.neck.in_features = ["p1", "p2", "p3"] 29 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_internimage.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detrex.modeling.backbone import InternImage 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # internimage-large-4scale baseline 9 | model.backbone = L(InternImage)( 10 | core_op="DCNv3", 11 | channels=160, 12 | depths=[5, 5, 22, 5], 13 | groups=[10, 20, 40, 80], 14 | mlp_ratio=4., 15 | drop_path_rate=0.0, 16 | norm_layer="LN", 17 | layer_scale=1.0, 18 | offset_scale=2.0, 19 | post_norm=True, 20 | with_cp=False, 21 | out_indices=(1, 2, 3), 22 | ) 23 | 24 | # modify neck config 25 | model.neck.input_shapes = { 26 | "p1": ShapeSpec(channels=320), 27 | "p2": ShapeSpec(channels=640), 28 | "p3": ShapeSpec(channels=1280), 29 | } 30 | model.neck.in_features = ["p1", "p2", "p3"] 31 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_swin_base_384.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=384, 11 | embed_dim=128, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(4, 8, 16, 32), 14 | window_size=12, 15 | out_indices=(1, 2, 3), 16 | ) 17 | 18 | # modify neck config 19 | model.neck.input_shapes = { 20 | "p1": ShapeSpec(channels=256), 21 | "p2": ShapeSpec(channels=512), 22 | "p3": ShapeSpec(channels=1024), 23 | } 24 | model.neck.in_features = ["p1", "p2", "p3"] 25 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_swin_large_224.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | window_size=7, 15 | out_indices=(1, 2, 3), 16 | ) 17 | 18 | # modify neck config 19 | model.neck.input_shapes = { 20 | "p1": ShapeSpec(channels=384), 21 | "p2": ShapeSpec(channels=768), 22 | "p3": ShapeSpec(channels=1536), 23 | } 24 | model.neck.in_features = ["p1", "p2", "p3"] 25 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_swin_large_384.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=384, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | window_size=12, 15 | out_indices=(1, 2, 3), 16 | ) 17 | 18 | # modify neck config 19 | model.neck.input_shapes = { 20 | "p1": ShapeSpec(channels=384), 21 | "p2": ShapeSpec(channels=768), 22 | "p3": ShapeSpec(channels=1536), 23 | } 24 | model.neck.in_features = ["p1", "p2", "p3"] 25 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_swin_small_224.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=96, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(3, 6, 12, 24), 14 | drop_path_rate=0.2, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=192), 22 | "p2": ShapeSpec(channels=384), 23 | "p3": ShapeSpec(channels=768), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | -------------------------------------------------------------------------------- /projects/dino/configs/models/dino_swin_tiny_224.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.backbone import SwinTransformer 4 | 5 | from .dino_r50 import model 6 | 7 | 8 | # modify backbone config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=96, 12 | depths=(2, 2, 6, 2), 13 | num_heads=(3, 6, 12, 24), 14 | drop_path_rate=0.1, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=192), 22 | "p2": ShapeSpec(channels=384), 23 | "p3": ShapeSpec(channels=768), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | -------------------------------------------------------------------------------- /projects/dino/configs/timm_example.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.modeling import ShapeSpec 3 | from detectron2.layers import FrozenBatchNorm2d 4 | from .dino_r50_4scale_12ep import ( 5 | train, 6 | dataloader, 7 | optimizer, 8 | lr_multiplier, 9 | ) 10 | from .models.dino_r50 import model 11 | 12 | from detrex.modeling.backbone import TimmBackbone 13 | 14 | # modify backbone configs 15 | model.backbone = L(TimmBackbone)( 16 | model_name="resnet152d", # name in timm 17 | features_only=True, 18 | pretrained=True, 19 | in_channels=3, 20 | out_indices=(1, 2, 3), 21 | norm_layer=FrozenBatchNorm2d, 22 | ) 23 | 24 | # modify neck configs 25 | model.neck.input_shapes = { 26 | "p1": ShapeSpec(channels=256), 27 | "p2": ShapeSpec(channels=512), 28 | "p3": ShapeSpec(channels=1024), 29 | } 30 | model.neck.in_features = ["p1", "p2", "p3"] 31 | 32 | # modify training configs 33 | train.init_checkpoint = "" 34 | -------------------------------------------------------------------------------- /projects/dino/configs/torchvision_example.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.modeling import ShapeSpec 3 | from .dino_r50_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | lr_multiplier, 8 | ) 9 | from .models.dino_r50 import model 10 | 11 | from detrex.modeling.backbone import TorchvisionBackbone 12 | 13 | # modify backbone configs 14 | model.backbone = L(TorchvisionBackbone)( 15 | model_name="resnet50", 16 | pretrained=True, 17 | return_nodes={ 18 | "layer2": "res3", 19 | "layer3": "res4", 20 | "layer4": "res5", 21 | }, 22 | ) 23 | 24 | # modify neck configs 25 | model.neck.input_shapes = { 26 | "res3": ShapeSpec(channels=512), 27 | "res4": ShapeSpec(channels=1024), 28 | "res5": ShapeSpec(channels=2048), 29 | } 30 | model.neck.in_features = ["res3", "res4", "res5"] 31 | 32 | # modify training configs 33 | train.init_checkpoint = "" 34 | -------------------------------------------------------------------------------- /projects/dino/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .dino_transformer import ( 18 | DINOTransformerEncoder, 19 | DINOTransformerDecoder, 20 | DINOTransformer, 21 | ) 22 | from .dino import DINO 23 | from .dn_criterion import DINOCriterion 24 | -------------------------------------------------------------------------------- /projects/dino_eva/assets/dino_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dino_eva/assets/dino_arch.png -------------------------------------------------------------------------------- /projects/dino_eva/configs/common/coco_loader_lsj.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2 import model_zoo 3 | from detectron2.config import LazyCall as L 4 | 5 | # Data using LSJ 6 | image_size = 1024 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader 8 | dataloader.train.mapper.augmentations = [ 9 | L(T.RandomFlip)(horizontal=True), # flip first 10 | L(T.ResizeScale)( 11 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 12 | ), 13 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False), 14 | ] 15 | dataloader.train.mapper.image_format = "RGB" 16 | dataloader.train.total_batch_size = 64 17 | # recompute boxes due to cropping 18 | dataloader.train.mapper.recompute_boxes = True 19 | 20 | dataloader.test.mapper.augmentations = [ 21 | L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size), 22 | ] -------------------------------------------------------------------------------- /projects/dino_eva/configs/common/coco_loader_lsj_1024.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2 import model_zoo 3 | from detectron2.config import LazyCall as L 4 | 5 | # Data using LSJ 6 | image_size = 1024 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader 8 | dataloader.train.mapper.augmentations = [ 9 | L(T.RandomFlip)(horizontal=True), # flip first 10 | L(T.ResizeScale)( 11 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 12 | ), 13 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False), 14 | ] 15 | dataloader.train.mapper.image_format = "RGB" 16 | dataloader.train.total_batch_size = 64 17 | # recompute boxes due to cropping 18 | dataloader.train.mapper.recompute_boxes = True 19 | 20 | dataloader.test.mapper.augmentations = [ 21 | L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size), 22 | ] -------------------------------------------------------------------------------- /projects/dino_eva/configs/common/coco_loader_lsj_1280.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2 import model_zoo 3 | from detectron2.config import LazyCall as L 4 | 5 | # Data using LSJ 6 | image_size = 1280 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader 8 | dataloader.train.mapper.augmentations = [ 9 | L(T.RandomFlip)(horizontal=True), # flip first 10 | L(T.ResizeScale)( 11 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 12 | ), 13 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False), 14 | ] 15 | dataloader.train.mapper.image_format = "RGB" 16 | dataloader.train.total_batch_size = 64 17 | # recompute boxes due to cropping 18 | dataloader.train.mapper.recompute_boxes = True 19 | 20 | dataloader.test.mapper.augmentations = [ 21 | L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size), 22 | ] -------------------------------------------------------------------------------- /projects/dino_eva/configs/common/coco_loader_lsj_1536.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2 import model_zoo 3 | from detectron2.config import LazyCall as L 4 | 5 | # Data using LSJ 6 | image_size = 1536 7 | dataloader = model_zoo.get_config("common/data/coco.py").dataloader 8 | dataloader.train.mapper.augmentations = [ 9 | L(T.RandomFlip)(horizontal=True), # flip first 10 | L(T.ResizeScale)( 11 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 12 | ), 13 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False), 14 | ] 15 | dataloader.train.mapper.image_format = "RGB" 16 | dataloader.train.total_batch_size = 64 17 | # recompute boxes due to cropping 18 | dataloader.train.mapper.recompute_boxes = True 19 | 20 | dataloader.test.mapper.augmentations = [ 21 | L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size), 22 | ] -------------------------------------------------------------------------------- /projects/dino_eva/configs/models/dino_eva_01.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch.nn as nn 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool 6 | from detrex.modeling.backbone import EVAViT, SimpleFeaturePyramid 7 | 8 | from .dino_r50 import model 9 | 10 | 11 | # Base 12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1 13 | 14 | # EVA-01 15 | model.backbone = L(SimpleFeaturePyramid)( 16 | net=L(EVAViT)( 17 | img_size=1024, 18 | patch_size=16, 19 | embed_dim=embed_dim, 20 | depth=depth, 21 | num_heads=num_heads, 22 | drop_path_rate=dp, 23 | window_size=14, 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | norm_layer=partial(nn.LayerNorm, eps=1e-6), 27 | window_block_indexes=[ 28 | # 2, 5, 8 11 for global attention 29 | 0, 30 | 1, 31 | 3, 32 | 4, 33 | 6, 34 | 7, 35 | 9, 36 | 10, 37 | ], 38 | residual_block_indexes=[], 39 | use_rel_pos=True, 40 | out_feature="last_feat", 41 | ), 42 | in_feature="${.net.out_feature}", 43 | out_channels=256, 44 | scale_factors=(2.0, 1.0, 0.5), # (4.0, 2.0, 1.0, 0.5) in ViTDet 45 | top_block=L(LastLevelMaxPool)(), 46 | norm="LN", 47 | square_pad=1024, 48 | ) 49 | 50 | # modify neck config 51 | model.neck.input_shapes = { 52 | "p3": ShapeSpec(channels=256), 53 | "p4": ShapeSpec(channels=256), 54 | "p5": ShapeSpec(channels=256), 55 | "p6": ShapeSpec(channels=256), 56 | } 57 | model.neck.in_features = ["p3", "p4", "p5", "p6"] 58 | model.neck.num_outs = 4 59 | model.transformer.num_feature_levels = 4 60 | -------------------------------------------------------------------------------- /projects/dino_eva/configs/models/dino_eva_02.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch.nn as nn 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool 6 | from detrex.modeling.backbone import EVA02_ViT, SimpleFeaturePyramid 7 | 8 | from .dino_r50 import model 9 | 10 | 11 | # Base 12 | embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1 13 | 14 | # EVA-01 15 | model.backbone = L(SimpleFeaturePyramid)( 16 | net=L(EVA02_ViT)( 17 | img_size=1024, 18 | patch_size=16, 19 | embed_dim=embed_dim, 20 | depth=depth, 21 | num_heads=num_heads, 22 | drop_path_rate=dp, 23 | window_size=14, 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | norm_layer=partial(nn.LayerNorm, eps=1e-6), 27 | window_block_indexes=[ 28 | # 2, 5, 8 11 for global attention 29 | 0, 30 | 1, 31 | 3, 32 | 4, 33 | 6, 34 | 7, 35 | 9, 36 | 10, 37 | ], 38 | residual_block_indexes=[], 39 | use_rel_pos=True, 40 | out_feature="last_feat", 41 | ), 42 | in_feature="${.net.out_feature}", 43 | out_channels=256, 44 | scale_factors=(2.0, 1.0, 0.5), # (4.0, 2.0, 1.0, 0.5) in ViTDet 45 | top_block=L(LastLevelMaxPool)(), 46 | norm="LN", 47 | square_pad=1024, 48 | ) 49 | 50 | # modify neck config 51 | model.neck.input_shapes = { 52 | "p3": ShapeSpec(channels=256), 53 | "p4": ShapeSpec(channels=256), 54 | "p5": ShapeSpec(channels=256), 55 | "p6": ShapeSpec(channels=256), 56 | } 57 | model.neck.in_features = ["p3", "p4", "p5", "p6"] 58 | model.neck.num_outs = 4 59 | model.transformer.num_feature_levels = 4 60 | -------------------------------------------------------------------------------- /projects/dino_eva/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .dino_transformer import ( 18 | DINOTransformerEncoder, 19 | DINOTransformerDecoder, 20 | DINOTransformer, 21 | ) 22 | from .dino import DINO 23 | from .dn_criterion import DINOCriterion 24 | -------------------------------------------------------------------------------- /projects/dn_deformable_detr/assets/dn_detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dn_deformable_detr/assets/dn_detr_arch.png -------------------------------------------------------------------------------- /projects/dn_deformable_detr/configs/dn_deformable_detr_r50_12ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dn_deformable_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dn_deformable_detr_r50_12ep" 12 | 13 | # max training iterations 14 | train.max_iter = 90000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/dn_deformable_detr/configs/dn_deformable_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dn_deformable_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dn_deformable_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/dn_deformable_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .dn_deformable_transformer import ( 18 | DNDeformableDetrTransformerEncoder, 19 | DNDeformableDetrTransformerDecoder, 20 | DNDeformableDetrTransformer, 21 | ) 22 | from .dn_deformable_detr import DNDeformableDETR 23 | from .dn_criterion import DNCriterion 24 | -------------------------------------------------------------------------------- /projects/dn_detr/assets/dn_detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/dn_detr/assets/dn_detr_arch.png -------------------------------------------------------------------------------- /projects/dn_detr/configs/dn_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dn_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dn_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/dn_detr/configs/dn_detr_r50_dc5_50ep.py: -------------------------------------------------------------------------------- 1 | from .dn_detr_r50_50ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | ) 7 | from .models.dn_detr_r50_dc5 import model 8 | 9 | # modify training config 10 | train.init_checkpoint = "https://download.pytorch.org/models/resnet50-0676ba61.pth" 11 | train.output_dir = "./output/dab_detr_r50_dc5_50ep" 12 | 13 | -------------------------------------------------------------------------------- /projects/dn_detr/configs/models/dn_detr_r50_dc5.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detrex.modeling.backbone.torchvision_resnet import TorchvisionResNet 3 | 4 | from .dn_detr_r50 import model 5 | 6 | 7 | model.backbone=L(TorchvisionResNet)( 8 | name="resnet50", 9 | train_backbone=True, 10 | dilation=True, 11 | return_layers={"layer4": "res5"} 12 | ) 13 | -------------------------------------------------------------------------------- /projects/dn_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .dn_detr import DNDETR 2 | from .dn_transformers import ( 3 | DNDetrTransformerEncoder, 4 | DNDetrTransformerDecoder, 5 | DNDetrTransformer, 6 | ) 7 | from .dn_criterion import DNCriterion 8 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_12ep.py: -------------------------------------------------------------------------------- 1 | from .focus_detr_r50_4scale_12ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "./pre-trained/resnet_torch/r101_v1.pkl" 11 | train.output_dir = "./output/focus_detr_r101_4scale_12ep" 12 | 13 | # modify model config 14 | model.backbone.stages.depth = 101 15 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_24ep.py: -------------------------------------------------------------------------------- 1 | from .focus_detr_r50_4scale_24ep import ( 2 | train, 3 | dataloader, 4 | optimizer, 5 | lr_multiplier, 6 | model, 7 | ) 8 | 9 | # modify training config 10 | train.init_checkpoint = "./pre-trained/resnet_torch/r101_v1.pkl" 11 | train.output_dir = "./output/focus_detr_r101_4scale_24ep" 12 | 13 | # modify model config 14 | model.backbone.stages.depth = 101 15 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/focus_detr_resnet/focus_detr_r101_4scale_36ep.py: -------------------------------------------------------------------------------- 1 | # 使用detectron2的lr_multiplier_3x,学习率,调整学习率衰减 2 | from detrex.config import get_config 3 | from .focus_detr_r50_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | model, 8 | ) 9 | 10 | # get default config 11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_3x 12 | 13 | # modify model config 14 | # use the original implementation of dab-detr position embedding in 24 epochs training. 15 | model.position_embedding.temperature = 20 16 | model.position_embedding.offset = 0.0 17 | 18 | # modify training config 19 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl" 20 | train.output_dir = "./output/focus_detr_r50_4scale_36ep_v3" 21 | 22 | # max training iterations 23 | train.max_iter = 270000 24 | 25 | # modify dataloader config 26 | # not filter empty annotations during training 27 | dataloader.train.dataset.filter_empty = True 28 | dataloader.train.num_workers = 16 -------------------------------------------------------------------------------- /projects/focus_detr/configs/focus_detr_resnet/focus_detr_r50_4scale_24ep.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detrex.config import get_config 8 | from .focus_detr_r50_4scale_12ep import ( 9 | train, 10 | dataloader, 11 | optimizer, 12 | model, 13 | ) 14 | 15 | # get default config 16 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24ep 17 | 18 | # modify model config 19 | # use the original implementation of dab-detr position embedding in 24 epochs training. 20 | model.position_embedding.temperature = 20 21 | model.position_embedding.offset = 0.0 22 | 23 | # modify training config 24 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl" 25 | train.output_dir = "./output/focus_detr_r50_4scale_24ep" 26 | 27 | # max training iterations 28 | train.max_iter = 180000 29 | 30 | # modify dataloader config 31 | # not filter empty annotations during training 32 | dataloader.train.dataset.filter_empty = True 33 | dataloader.train.num_workers = 16 -------------------------------------------------------------------------------- /projects/focus_detr/configs/focus_detr_resnet/focus_detr_r50_4scale_36ep.py: -------------------------------------------------------------------------------- 1 | # 对24个epoch的进行修改,延长训练时间 2 | from detrex.config import get_config 3 | from .focus_detr_r50_4scale_12ep import ( 4 | train, 5 | dataloader, 6 | optimizer, 7 | model, 8 | ) 9 | 10 | # get default config 11 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_24_36ep 12 | 13 | # modify model config 14 | # use the original implementation of dab-detr position embedding in 24 epochs training. 15 | model.position_embedding.temperature = 20 16 | model.position_embedding.offset = 0.0 17 | 18 | # modify training config 19 | train.init_checkpoint = "./pre-trained/resnet_torch/r50_v1.pkl" 20 | train.output_dir = "./output/focus_detr_r50_4scale_36ep_v2" 21 | 22 | # max training iterations 23 | train.max_iter = 270000 24 | 25 | # modify dataloader config 26 | # not filter empty annotations during training 27 | dataloader.train.dataset.filter_empty = True 28 | dataloader.train.num_workers = 16 -------------------------------------------------------------------------------- /projects/focus_detr/configs/models/focus_detr_swin_base_384.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.modeling.backbone import SwinTransformer 10 | 11 | from .focus_detr_r50 import model 12 | 13 | 14 | # modify backbone config 15 | model.backbone = L(SwinTransformer)( 16 | pretrain_img_size=384, 17 | embed_dim=128, 18 | depths=(2, 2, 18, 2), 19 | num_heads=(4, 8, 16, 32), 20 | window_size=12, 21 | out_indices=(1, 2, 3), 22 | ) 23 | 24 | # modify neck config 25 | model.neck.input_shapes = { 26 | "p1": ShapeSpec(channels=256), 27 | "p2": ShapeSpec(channels=512), 28 | "p3": ShapeSpec(channels=1024), 29 | } 30 | model.neck.in_features = ["p1", "p2", "p3"] 31 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/models/focus_detr_swin_large_224.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.modeling.backbone import SwinTransformer 10 | 11 | from .focus_detr_r50 import model 12 | 13 | 14 | # modify backbone config 15 | model.backbone = L(SwinTransformer)( 16 | pretrain_img_size=224, 17 | embed_dim=192, 18 | depths=(2, 2, 18, 2), 19 | num_heads=(6, 12, 24, 48), 20 | window_size=7, 21 | out_indices=(1, 2, 3), 22 | ) 23 | 24 | # modify neck config 25 | model.neck.input_shapes = { 26 | "p1": ShapeSpec(channels=384), 27 | "p2": ShapeSpec(channels=768), 28 | "p3": ShapeSpec(channels=1536), 29 | } 30 | model.neck.in_features = ["p1", "p2", "p3"] 31 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/models/focus_detr_swin_large_384.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.modeling.backbone import SwinTransformer 10 | 11 | from .focus_detr_r50 import model 12 | 13 | 14 | # modify backbone config 15 | model.backbone = L(SwinTransformer)( 16 | pretrain_img_size=384, 17 | embed_dim=192, 18 | depths=(2, 2, 18, 2), 19 | num_heads=(6, 12, 24, 48), 20 | window_size=12, 21 | out_indices=(1, 2, 3), 22 | ) 23 | 24 | # modify neck config 25 | model.neck.input_shapes = { 26 | "p1": ShapeSpec(channels=384), 27 | "p2": ShapeSpec(channels=768), 28 | "p3": ShapeSpec(channels=1536), 29 | } 30 | model.neck.in_features = ["p1", "p2", "p3"] 31 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/models/focus_detr_swin_small_224.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.modeling.backbone import SwinTransformer 10 | 11 | from .focus_detr_r50 import model 12 | 13 | 14 | # modify backbone config 15 | model.backbone = L(SwinTransformer)( 16 | pretrain_img_size=224, 17 | embed_dim=96, 18 | depths=(2, 2, 18, 2), 19 | num_heads=(3, 6, 12, 24), 20 | drop_path_rate=0.2, 21 | window_size=7, 22 | out_indices=(1, 2, 3), 23 | ) 24 | 25 | # modify neck config 26 | model.neck.input_shapes = { 27 | "p1": ShapeSpec(channels=192), 28 | "p2": ShapeSpec(channels=384), 29 | "p3": ShapeSpec(channels=768), 30 | } 31 | model.neck.in_features = ["p1", "p2", "p3"] 32 | -------------------------------------------------------------------------------- /projects/focus_detr/configs/models/focus_detr_swin_tiny_224.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. 2 | 3 | #This program is free software; you can redistribute it and/or modify it under the terms of the MIT License. 4 | 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.modeling.backbone import SwinTransformer 10 | 11 | from .focus_detr_r50 import model 12 | 13 | 14 | # modify backbone config 15 | model.backbone = L(SwinTransformer)( 16 | pretrain_img_size=224, 17 | embed_dim=96, 18 | depths=(2, 2, 6, 2), 19 | num_heads=(3, 6, 12, 24), 20 | drop_path_rate=0.1, 21 | window_size=7, 22 | out_indices=(1, 2, 3), 23 | ) 24 | 25 | # modify neck config 26 | model.neck.input_shapes = { 27 | "p1": ShapeSpec(channels=192), 28 | "p2": ShapeSpec(channels=384), 29 | "p3": ShapeSpec(channels=768), 30 | } 31 | model.neck.in_features = ["p1", "p2", "p3"] 32 | -------------------------------------------------------------------------------- /projects/focus_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .focus_detr_transformer import ( 18 | FOCUS_DETRTransformerEncoder, 19 | FOCUS_DETRTransformerDecoder, 20 | FOCUS_DETRTransformer, 21 | MaskPredictor, 22 | ) 23 | from .focus_detr import FOCUS_DETR 24 | from .dn_criterion import FOCUS_DETRCriterion 25 | -------------------------------------------------------------------------------- /projects/group_detr/README.md: -------------------------------------------------------------------------------- 1 | ## Group DETR: Fast DETR Training with Group-Wise One-to-Many Assignment 2 | 3 | Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong 4 | 5 | [[`arXiv`](https://arxiv.org/abs/2207.13085)] [[`BibTeX`](#citing-group-detr)] 6 | 7 |
8 | 9 |

10 | 11 | **Note**: This is the implementation of `Conditional DETR + Group DETR` 12 | 13 | ## Training 14 | All configs can be trained with: 15 | ```bash 16 | cd detrex 17 | python tools/train_net.py --config-file projects/group_detr/configs/path/to/config.py --num-gpus 8 18 | ``` 19 | By default, we use 8 GPUs with total batch size as 16 for training. 20 | 21 | ## Evaluation 22 | Model evaluation can be done as follows: 23 | ```bash 24 | cd detrex 25 | python tools/train_net.py --config-file projects/group_detr/configs/path/to/config.py --eval-only train.init_checkpoint=/path/to/model_checkpoint 26 | ``` 27 | 28 | ## Citing Group-DETR 29 | If you find our work helpful for your research, please consider citing the following BibTeX entry. 30 | 31 | ```BibTex 32 | @article{chen2022group, 33 | title={Group DETR: Fast DETR Training with Group-Wise One-to-Many Assignment}, 34 | author={Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong}, 35 | journal={arXiv preprint arXiv:2207.13085}, 36 | year={2022} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /projects/group_detr/assets/group_detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/group_detr/assets/group_detr_arch.png -------------------------------------------------------------------------------- /projects/group_detr/configs/group_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.group_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/group_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/group_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_detr import GroupDETR 2 | from .group_detr_transformer import ( 3 | GroupDetrTransformerEncoder, 4 | GroupDetrTransformerDecoder, 5 | GroupDetrTransformer, 6 | ) 7 | from .attention import GroupConditionalSelfAttention 8 | from .group_criterion import GroupSetCriterion 9 | from .group_matcher import GroupHungarianMatcher 10 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/assets/h_detr_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/h_deformable_detr/assets/h_detr_arch.png -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_r50_50ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.h_deformable_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 6 | optimizer = get_config("common/optim.py").AdamW 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/h_deformable_detr_r50_50ep" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_12ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | 4 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 5 | # modify model config 6 | model.with_box_refine = True 7 | model.as_two_stage = True 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/h_deformable_detr_r50_two_stage_12ep" 12 | train.max_iter = 90000 13 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_36ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | 4 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 5 | # modify model config 6 | model.with_box_refine = True 7 | model.as_two_stage = True 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/h_deformable_detr_r50_two_stage_36ep" 12 | train.max_iter = 270000 13 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | drop_path_rate=0.2, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=384), 22 | "p2": ShapeSpec(channels=768), 23 | "p3": ShapeSpec(channels=1536), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | # modify training config 30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth" 31 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_12ep" 32 | train.max_iter = 90000 33 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep_900queries.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | drop_path_rate=0.2, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=384), 22 | "p2": ShapeSpec(channels=768), 23 | "p3": ShapeSpec(channels=1536), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | model.num_queries_one2one = 900 30 | model.transformer.two_stage_num_proposals = 2400 31 | 32 | # modify training config 33 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth" 34 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_12ep_900queries" 35 | train.max_iter = 90000 36 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | drop_path_rate=0.5, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=384), 22 | "p2": ShapeSpec(channels=768), 23 | "p3": ShapeSpec(channels=1536), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | # modify training config 30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth" 31 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_36ep" 32 | train.max_iter = 270000 33 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep_900queries.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=192, 12 | depths=(2, 2, 18, 2), 13 | num_heads=(6, 12, 24, 48), 14 | drop_path_rate=0.5, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=384), 22 | "p2": ShapeSpec(channels=768), 23 | "p3": ShapeSpec(channels=1536), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | model.num_queries_one2one = 900 30 | model.transformer.two_stage_num_proposals = 2400 31 | 32 | # modify training config 33 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth" 34 | train.output_dir = "./output/h_deformable_detr_swin_large_two_stage_36ep_900queries" 35 | train.max_iter = 270000 36 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_12ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=96, 12 | depths=(2, 2, 6, 2), 13 | num_heads=(3, 6, 12, 24), 14 | drop_path_rate=0.2, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=192), 22 | "p2": ShapeSpec(channels=384), 23 | "p3": ShapeSpec(channels=768), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | # modify training config 30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth" 31 | train.output_dir = "./output/h_deformable_detr_swin_tiny_two_stage_12ep" 32 | train.max_iter = 90000 33 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_36ep.py: -------------------------------------------------------------------------------- 1 | from .h_deformable_detr_r50_50ep import train, dataloader, optimizer, model 2 | from detrex.config import get_config 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.backbone import SwinTransformer 6 | 7 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_36ep 8 | # modify model config 9 | model.backbone = L(SwinTransformer)( 10 | pretrain_img_size=224, 11 | embed_dim=96, 12 | depths=(2, 2, 6, 2), 13 | num_heads=(3, 6, 12, 24), 14 | drop_path_rate=0.2, 15 | window_size=7, 16 | out_indices=(1, 2, 3), 17 | ) 18 | 19 | # modify neck config 20 | model.neck.input_shapes = { 21 | "p1": ShapeSpec(channels=192), 22 | "p2": ShapeSpec(channels=384), 23 | "p3": ShapeSpec(channels=768), 24 | } 25 | model.neck.in_features = ["p1", "p2", "p3"] 26 | model.with_box_refine = True 27 | model.as_two_stage = True 28 | 29 | # modify training config 30 | train.init_checkpoint = "/mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth" 31 | train.output_dir = "./output/h_deformable_detr_swin_tiny_two_stage_36ep" 32 | train.max_iter = 270000 33 | -------------------------------------------------------------------------------- /projects/h_deformable_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .h_deformable_transformer import ( 17 | HDeformableDetrTransformerEncoder, 18 | HDeformableDetrTransformerDecoder, 19 | HDeformableDetrTransformer, 20 | ) 21 | from .h_deformable_detr import HDeformableDETR 22 | from .deformable_criterion import DeformableCriterion 23 | -------------------------------------------------------------------------------- /projects/maskdino/assets/dinosaur.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/dinosaur.png -------------------------------------------------------------------------------- /projects/maskdino/assets/framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/framework.jpg -------------------------------------------------------------------------------- /projects/maskdino/assets/instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/instance.png -------------------------------------------------------------------------------- /projects/maskdino/assets/panoptic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/panoptic.png -------------------------------------------------------------------------------- /projects/maskdino/assets/semantic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/semantic.png -------------------------------------------------------------------------------- /projects/maskdino/assets/sota.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/assets/sota.png -------------------------------------------------------------------------------- /projects/maskdino/configs/data/coco_instance_seg.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | 3 | import detectron2.data.transforms as T 4 | from detectron2.config import LazyCall as L 5 | from detectron2.data import ( 6 | build_detection_test_loader, 7 | build_detection_train_loader, 8 | get_detection_dataset_dicts, 9 | ) 10 | from detectron2.evaluation import COCOEvaluator 11 | 12 | # from detrex.data import DetrDatasetMapper 13 | # from projects.maskDINO.data.dataset_mappers.coco_instance_lsj_aug_dataset_mapper import COCOInstanceLSJDatasetMapper, build_transform_gen 14 | from detrex.data.dataset_mappers import COCOInstanceNewBaselineDatasetMapper,coco_instance_transform_gen 15 | dataloader = OmegaConf.create() 16 | 17 | dataloader.train = L(build_detection_train_loader)( 18 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), 19 | mapper=L(COCOInstanceNewBaselineDatasetMapper)( 20 | augmentation=L(coco_instance_transform_gen)( 21 | image_size=1024, 22 | min_scale=0.1, 23 | max_scale=2.0, 24 | random_flip="horizontal" 25 | ), 26 | is_train=True, 27 | image_format="RGB", 28 | ), 29 | total_batch_size=16, 30 | num_workers=4, 31 | ) 32 | 33 | dataloader.test = L(build_detection_test_loader)( 34 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), 35 | mapper=L(COCOInstanceNewBaselineDatasetMapper)( 36 | augmentation=[ 37 | L(T.ResizeShortestEdge)( 38 | short_edge_length=800, 39 | max_size=1333, 40 | ), 41 | ], 42 | is_train=False, 43 | image_format="RGB", 44 | ), 45 | num_workers=4, 46 | ) 47 | 48 | dataloader.evaluator = L(COCOEvaluator)( 49 | dataset_name="${..test.dataset.names}", 50 | ) 51 | -------------------------------------------------------------------------------- /projects/maskdino/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import datasets 3 | # from . import datasets_detr 4 | -------------------------------------------------------------------------------- /projects/maskdino/data/dataset_mappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /projects/maskdino/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/maskdino/evaluation/__init__.py -------------------------------------------------------------------------------- /projects/maskdino/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) IDEA, Inc. and its affiliates. 2 | from .backbone.swin import D2SwinTransformer 3 | from .pixel_decoder.maskdino_encoder import MaskDINOEncoder 4 | from .meta_arch.maskdino_head import MaskDINOHead 5 | 6 | -------------------------------------------------------------------------------- /projects/maskdino/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /projects/maskdino/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) IDEA, Inc. and its affiliates. 2 | 3 | -------------------------------------------------------------------------------- /projects/maskdino/modeling/pixel_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) IDEA, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /projects/maskdino/modeling/transformer_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) IDEA, Inc. and its affiliates. 2 | from .maskdino_decoder import MaskDINODecoder 3 | 4 | -------------------------------------------------------------------------------- /projects/maskdino/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # import misc -------------------------------------------------------------------------------- /projects/pnp_detr/README.md: -------------------------------------------------------------------------------- 1 | ## PnP-DETR: Towards Efficient Visual Analysis with Transformers 2 | 3 | Tao Wang, Li Yuan, Yunpeng Chen, Jiashi Feng, Shuicheng Yan 4 | 5 | [[`arXiv`](https://arxiv.org/abs/2109.07036)] [[`BibTeX`](#citing-pnp-detr)] 6 | 7 |
8 | 9 |

10 | 11 | 12 | ## Training 13 | Training PnP-DETR model for 300 epochs: 14 | ```bash 15 | cd detrex 16 | python tools/train_net.py --config-file projects/pnp_detr/configs/pnp_detr_r50_300ep.py --num-gpus 8 17 | ``` 18 | By default, we use 8 GPUs with total batch size as 64 for training. 19 | 20 | ## Evaluation 21 | Model evaluation can be done as follows: 22 | ```bash 23 | cd detrex 24 | python tools/train_net.py --config-file projects/pnp_detr/configs/path/to/config.py \ 25 | --eval-only train.init_checkpoint=/path/to/model_checkpoint 26 | ``` 27 | 28 | 29 | ## Citing PnP-DETR 30 | ```BibTex 31 | @inproceedings{wang2021pnp, 32 | title={PnP-DETR: Towards Efficient Visual Analysis with Transformers}, 33 | author={Wang, Tao and Yuan, Li and Chen, Yunpeng and Feng, Jiashi and Yan, Shuicheng}, 34 | booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, 35 | pages={4661--4670}, 36 | year={2021} 37 | } 38 | ``` -------------------------------------------------------------------------------- /projects/pnp_detr/assets/PnP-DETR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/pnp_detr/assets/PnP-DETR.png -------------------------------------------------------------------------------- /projects/pnp_detr/configs/pnp_detr_r101_300ep.py: -------------------------------------------------------------------------------- 1 | from .pnp_detr_r50_300ep import train, dataloader, optimizer, lr_multiplier, model 2 | 3 | # modify model config 4 | model.backbone.stages.depth = 101 5 | 6 | # modify training config 7 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 8 | train.output_dir = "./output/detr_r101_300ep" 9 | -------------------------------------------------------------------------------- /projects/pnp_detr/configs/pnp_detr_r50_300ep.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.pnp_detr_r50 import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 6 | optimizer = get_config("common/optim.py").AdamW 7 | train = get_config("common/train.py").train 8 | 9 | # modify training config 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/detr_r50_300ep" 12 | train.max_iter = 554400 13 | 14 | # modify lr_multiplier 15 | lr_multiplier.scheduler.milestones = [369600, 554400] 16 | 17 | # modify optimizer config 18 | optimizer.weight_decay = 1e-4 19 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 20 | 21 | # modify dataloader config 22 | dataloader.train.num_workers = 16 23 | dataloader.train.total_batch_size = 64 24 | -------------------------------------------------------------------------------- /projects/pnp_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .detr import PnPDETR 2 | from .transformer import ( 3 | PnPDetrTransformerEncoder, 4 | PnPDetrTransformerDecoder, 5 | PnPDetrTransformer, 6 | ) 7 | -------------------------------------------------------------------------------- /projects/sqr_detr/assets/sqr_detr_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/projects/sqr_detr/assets/sqr_detr_overall.png -------------------------------------------------------------------------------- /projects/sqr_detr/configs/dab_detr_r50_50ep_sqr.py: -------------------------------------------------------------------------------- 1 | from detrex.config import get_config 2 | from .models.dab_detr_r50_sqr import model 3 | 4 | dataloader = get_config("common/data/coco_detr.py").dataloader 5 | optimizer = get_config("common/optim.py").AdamW 6 | lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep 7 | train = get_config("common/train.py").train 8 | 9 | # initialize checkpoint to be loaded 10 | train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | train.output_dir = "./output/dab_detr_r50_50ep_sqr" 12 | 13 | # max training iterations 14 | train.max_iter = 375000 15 | 16 | # run evaluation every 5000 iters 17 | train.eval_period = 5000 18 | 19 | # log training infomation every 20 iters 20 | train.log_period = 20 21 | 22 | # save checkpoint every 5000 iters 23 | train.checkpointer.period = 5000 24 | 25 | # gradient clipping for training 26 | train.clip_grad.enabled = True 27 | train.clip_grad.params.max_norm = 0.1 28 | train.clip_grad.params.norm_type = 2 29 | 30 | # set training devices 31 | train.device = "cuda" 32 | model.device = train.device 33 | 34 | # modify optimizer config 35 | optimizer.lr = 1e-4 36 | optimizer.betas = (0.9, 0.999) 37 | optimizer.weight_decay = 1e-4 38 | optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 39 | 40 | # modify dataloader config 41 | dataloader.train.num_workers = 16 42 | 43 | # please notice that this is total batch size. 44 | # surpose you're using 4 gpus for training and the batch size for 45 | # each gpu is 16/4 = 4 46 | dataloader.train.total_batch_size = 16 47 | 48 | # dump the testing results into output_dir for visualization 49 | dataloader.evaluator.output_dir = train.output_dir 50 | -------------------------------------------------------------------------------- /projects/sqr_detr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .dab_transformer_sqr import ( 2 | DabDetrTransformerDecoder_qr, 3 | ) 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cloudpickle 2 | hydra-core 3 | omegaconf 4 | pybind11 5 | flake8 6 | isort 7 | black 8 | autoflake 9 | timm 10 | pytest 11 | scipy 12 | psutil 13 | opencv-python 14 | wandb 15 | submitit 16 | einops 17 | fairscale -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | [isort] 3 | line_length=100 4 | multi_line_output=3 5 | include_trailing_comma=True 6 | known_standard_library=numpy,setuptools,mock 7 | skip=./datasets,docs,detectron2 8 | skip_glob=*/__init__.py,**/configs/**,**/tests/config/**, detectron2/*/__init__.py 9 | known_myself=detrex 10 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx,panopticapi,black,isort,av,iopath,omegaconf,hydra,yaml,pydoc,submitit,cloudpickle,packaging 11 | no_lines_before=STDLIB,THIRDPARTY 12 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 13 | default_section=FIRSTPARTY 14 | 15 | [mypy] 16 | python_version=3.7 17 | ignore_missing_imports = True 18 | warn_unused_configs = True 19 | disallow_untyped_defs = True 20 | check_untyped_defs = True 21 | warn_unused_ignores = True 22 | warn_redundant_casts = True 23 | show_column_numbers = True 24 | follow_imports = silent 25 | allow_redefinition = True 26 | ; Require all functions to be annotated 27 | disallow_incomplete_defs = True -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from .attention import MultiheadAttention 18 | from .transformer import ( 19 | OriginalConditionalAttentionEncoder, 20 | OriginalConditionalAttentionDecoder, 21 | ) 22 | from .potision_embedding import ( 23 | DeformablePositionEmbeddingSine, 24 | DABPositionEmbeddingSine, 25 | DABPositionEmbeddingLearned, 26 | ) 27 | from .mlp import MLP 28 | from .losses import ( 29 | sigmoid_focal_loss, 30 | dice_loss, 31 | ) 32 | -------------------------------------------------------------------------------- /tests/utils/mlp.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The IDEA Authors. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | 19 | 20 | class MLP(nn.Module): 21 | """Very simple multi-layer perceptron (also called FFN)""" 22 | 23 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers): 24 | super().__init__() 25 | self.num_layers = num_layers 26 | h = [hidden_dim] * (num_layers - 1) 27 | self.layers = nn.ModuleList( 28 | nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]) 29 | ) 30 | 31 | def forward(self, x): 32 | for i, layer in enumerate(self.layers): 33 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 34 | return x 35 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/detrex/c56d32e3d0262cff9835ebe80a0642965ae0cb3e/tools/__init__.py --------------------------------------------------------------------------------