├── requirements.txt
├── mmcv_custom
    ├── __init__.py
    └── runner
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   └── epoch_based_runner.py
├── datasets
    ├── torchvision_datasets
    │   ├── __init__.py
    │   └── coco.py
    ├── __init__.py
    ├── panoptic_eval.py
    ├── data_prefetcher.py
    ├── coco_panoptic.py
    ├── samplers.py
    ├── coco.py
    ├── transforms.py
    └── coco_eval.py
├── configs
    ├── one_stage
    │   └── deformable-detr-baseline
    │   │   └── 50eps
    │   │       ├── r50_deformable_detr.sh
    │   │       ├── r50_deformable_detr_single_scale.sh
    │   │       ├── r50_deformable_detr_single_scale_dc5.sh
    │   │       └── r50_deformable_detr_plus_iterative_bbox_refinement.sh
    └── two_stage
    │   ├── deformable-detr-baseline
    │       ├── 12eps
    │       │   ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   └── swin
    │       │   │   ├── swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   ├── swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   ├── swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   └── swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       ├── 24eps
    │       │   ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   └── r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       ├── 36eps
    │       │   ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   └── swin
    │       │   │   ├── swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   ├── drop_path0.5_swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   ├── swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   │   └── drop_path0.5_swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       └── 50eps
    │       │   ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   ├── r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       │   └── r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │   └── deformable-detr-hybrid-branch
    │       ├── 12eps
    │           ├── r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group1_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group2_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group3_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group4_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group5_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t1800_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda0.1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda0.2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda0.5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           └── swin
    │           │   ├── swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           │   ├── swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           │   ├── swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           │   ├── swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           │   ├── swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           │   └── decay0.05_swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       ├── 24eps
    │           ├── r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           └── r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │       └── 36eps
    │           ├── r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           ├── r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │           └── swin
    │               ├── swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               ├── swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               ├── swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               ├── decay0.05_swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               ├── drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               ├── drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │               └── decay0.05_drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
├── util
    ├── __init__.py
    ├── box_ops.py
    └── plot_utils.py
├── models
    ├── ops
    │   ├── make.sh
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── ms_deform_attn.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── ms_deform_attn_func.py
    │   ├── src
    │   │   ├── vision.cpp
    │   │   ├── cuda
    │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │   └── ms_deform_attn_cuda.cu
    │   │   ├── cpu
    │   │   │   ├── ms_deform_attn_cpu.h
    │   │   │   └── ms_deform_attn_cpu.cpp
    │   │   └── ms_deform_attn.h
    │   ├── setup.py
    │   └── test.py
    ├── __init__.py
    ├── position_encoding.py
    ├── matcher.py
    ├── backbone.py
    └── segmentation.py
├── tools
    ├── run_dist_launch.sh
    ├── run_dist_slurm.sh
    └── launch.py
├── LICENSE
├── benchmark.py
└── engine.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | tqdm
3 | cython
4 | scipy
5 | wandb
6 | timm


--------------------------------------------------------------------------------
/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .checkpoint import load_checkpoint
4 | 
5 | __all__ = ["load_checkpoint"]
6 | 
7 | 


--------------------------------------------------------------------------------
/mmcv_custom/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | from .checkpoint import save_checkpoint
3 | from .epoch_based_runner import EpochBasedRunnerAmp
4 | 
5 | 
6 | __all__ = ["EpochBasedRunnerAmp", "save_checkpoint"]
7 | 
8 | 


--------------------------------------------------------------------------------
/datasets/torchvision_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------
6 | 
7 | from .coco import CocoDetection
8 | 


--------------------------------------------------------------------------------
/configs/one_stage/deformable-detr-baseline/50eps/r50_deformable_detr.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/one_stage/deformable-detr-baseline/12eps/r50_deformable_detr
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --num_queries_one2one 300 \
11 |     --num_queries_one2many 0 \
12 |     --k_one2many 0 \
13 |     --epochs 50 \
14 |     --lr_drop 40 \
15 |     ${PY_ARGS}
16 | 


--------------------------------------------------------------------------------
/configs/one_stage/deformable-detr-baseline/50eps/r50_deformable_detr_single_scale.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/one_stage/deformable-detr-baseline/12eps/r50_deformable_detr_single_scale
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --num_feature_levels 1 \
10 |     --output_dir ${EXP_DIR} \
11 |     --num_queries_one2one 300 \
12 |     --num_queries_one2many 0 \
13 |     --k_one2many 0 \
14 |     --epochs 50 \
15 |     --lr_drop 40 \
16 |     ${PY_ARGS}
17 | 


--------------------------------------------------------------------------------
/configs/one_stage/deformable-detr-baseline/50eps/r50_deformable_detr_single_scale_dc5.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/one_stage/deformable-detr-baseline/12eps/r50_deformable_detr_single_scale_dc5
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --num_feature_levels 1 \
10 |     --dilation \
11 |     --output_dir ${EXP_DIR} \
12 |     --num_queries_one2one 300 \
13 |     --num_queries_one2many 0 \
14 |     --k_one2many 0 \
15 |     --epochs 50 \
16 |     --lr_drop 40 \
17 |     ${PY_ARGS}
18 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------
6 | # Modified from DETR (https://github.com/facebookresearch/detr)
7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
8 | # ------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/configs/one_stage/deformable-detr-baseline/50eps/r50_deformable_detr_plus_iterative_bbox_refinement.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/one_stage/deformable-detr-baseline/12eps/r50_deformable_detr_plus_iterative_bbox_refinement
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --dim_feedforward 2048 \
12 |     --num_queries_one2one 300 \
13 |     --num_queries_one2many 0 \
14 |     --k_one2many 0 \
15 |     --epochs 50 \
16 |     --lr_drop 40 \
17 |     ${PY_ARGS}
18 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/24eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/24eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 24 \
17 |     --lr_drop 20 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/50eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/50eps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 50 \
17 |     --lr_drop 40 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 1800 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/24eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/24eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 1800 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 24 \
17 |     --lr_drop 20 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 1800 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/50eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/50eps/r50_n1800_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 1800 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 50 \
17 |     --lr_drop 40 \
18 |     ${PY_ARGS}
19 | 


--------------------------------------------------------------------------------
/models/ops/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | # ------------------------------------------------------------------------------------------------
 9 | 
10 | python setup.py build install
11 | 


--------------------------------------------------------------------------------
/models/ops/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn import MSDeformAttn
10 | 


--------------------------------------------------------------------------------
/models/ops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn_func import MSDeformAttnFunction
10 | 
11 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR (https://github.com/facebookresearch/detr)
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | # ------------------------------------------------------------------------
 9 | 
10 | from .deformable_detr import build
11 | 
12 | 
13 | def build_model(args):
14 |     return build(args)
15 | 
16 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     ${PY_ARGS}
22 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/24eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/24eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 24 \
17 |     --lr_drop 20 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     ${PY_ARGS}
22 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     ${PY_ARGS}
22 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/50eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/50eps/r50_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 50 \
17 |     --lr_drop 40 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     ${PY_ARGS}
22 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     ${PY_ARGS}
20 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/24eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/24eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 24 \
14 |     --lr_drop 20 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     ${PY_ARGS}
20 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/24eps/r50_hybrid_branch_lambda1_group6_t1500_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     ${PY_ARGS}
20 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/swin/swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/swin/swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     --backbone swin_tiny \
19 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
20 |     ${PY_ARGS}
21 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/swin/swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/swin/swin_tiny_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     --backbone swin_tiny \
19 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
20 |     ${PY_ARGS}
21 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/swin/swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/swin/swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     --backbone swin_large \
19 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
20 |     ${PY_ARGS}
21 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/swin/drop_path0.5_swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/swin/drop_path0.5_swin_large_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     --backbone swin_large \
19 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
20 |     --drop_path_rate 0.5 \
21 |     ${PY_ARGS}
22 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group1_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group1_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 300 \
17 |     --k_one2many 1 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group2_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group2_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 600 \
17 |     --k_one2many 2 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group3_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group3_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 900 \
17 |     --k_one2many 3 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t300_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 300 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t600_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 600 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 900 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group4_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group4_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1200 \
17 |     --k_one2many 4 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group5_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group5_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 5 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1200_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1200 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1800_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1800_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1800 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 2.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 5.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/24eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/24eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 24 \
14 |     --lr_drop 20 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 0.1 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.2_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 0.2 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda0.5_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 0.5 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/swin/swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/swin/swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_tiny \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/swin/swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/swin/swin_tiny_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_tiny \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone resnet101 \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/12eps/swin/swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/12eps/swin/swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 12 \
17 |     --lr_drop 11 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_large \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/configs/two_stage/deformable-detr-hybrid-branch/36eps/r101_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone resnet101 \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/models/ops/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include "ms_deform_attn.h"
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
15 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
16 | }
17 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-baseline/36eps/swin/drop_path0.5_swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-baseline/36eps/swin/drop_path0.5_swin_large_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --num_queries_one2one 300 \
14 |     --num_queries_one2many 0 \
15 |     --k_one2many 0 \
16 |     --epochs 36 \
17 |     --lr_drop 30 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_large \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
23 |     --drop_path_rate 0.5 \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/run_dist_launch.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | 
 8 | set -x
 9 | 
10 | GPUS=$1
11 | RUN_COMMAND=${@:2}
12 | if [ $GPUS -lt 8 ]; then
13 |     GPUS_PER_NODE=${GPUS_PER_NODE:-$GPUS}
14 | else
15 |     GPUS_PER_NODE=${GPUS_PER_NODE:-8}
16 | fi
17 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
18 | MASTER_PORT=${MASTER_PORT:-"29500"}
19 | NODE_RANK=${NODE_RANK:-0}
20 | 
21 | let "NNODES=GPUS/GPUS_PER_NODE"
22 | 
23 | python ./tools/launch.py \
24 |     --nnodes ${NNODES} \
25 |     --node_rank ${NODE_RANK} \
26 |     --master_addr ${MASTER_ADDR} \
27 |     --master_port ${MASTER_PORT} \
28 |     --nproc_per_node ${GPUS_PER_NODE} \
29 |     ${RUN_COMMAND}
30 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | 
 7 | python -u main.py \
 8 |     --output_dir ${EXP_DIR} \
 9 |     --with_box_refine \
10 |     --two_stage \
11 |     --dim_feedforward 2048 \
12 |     --epochs 12 \
13 |     --lr_drop 11 \
14 |     --num_queries_one2one 300 \
15 |     --num_queries_one2many 1500 \
16 |     --k_one2many 6 \
17 |     --lambda_one2many 1.0 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_small \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224.pth \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | 
 7 | python -u main.py \
 8 |     --output_dir ${EXP_DIR} \
 9 |     --with_box_refine \
10 |     --two_stage \
11 |     --dim_feedforward 2048 \
12 |     --epochs 36 \
13 |     --lr_drop 30 \
14 |     --num_queries_one2one 300 \
15 |     --num_queries_one2many 1500 \
16 |     --k_one2many 6 \
17 |     --lambda_one2many 1.0 \
18 |     --dropout 0.0 \
19 |     --mixed_selection \
20 |     --look_forward_twice \
21 |     --backbone swin_small \
22 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224.pth \
23 |     ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_tiny \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_tiny \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_large \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_small \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224_22k.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_small \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224_22k.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 900 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_large \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
24 |     ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/12eps/swin/decay0.05_swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/12eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 12 \
14 |     --lr_drop 11 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_tiny \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
24 |     --weight_decay 0.05 \
25 |     ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/decay0.05_swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_tiny \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
24 |     --weight_decay 0.05 \
25 |     ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 300 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_large \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
24 |     --drop_path_rate 0.5 \
25 |     ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 900 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_large \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
24 |     --drop_path_rate 0.5 \
25 |     ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/decay0.05_drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     --dim_feedforward 2048 \
13 |     --epochs 36 \
14 |     --lr_drop 30 \
15 |     --num_queries_one2one 900 \
16 |     --num_queries_one2many 1500 \
17 |     --k_one2many 6 \
18 |     --lambda_one2many 1.0 \
19 |     --dropout 0.0 \
20 |     --mixed_selection \
21 |     --look_forward_twice \
22 |     --backbone swin_large \
23 |     --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
24 |     --drop_path_rate 0.5 \
25 |     --weight_decay 0.05 \
26 |     ${PY_ARGS}
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 HDETR-group (Yuhui Yuan,Ding Jia,Haodi He,Xiaopei Wu,Haojun Yu)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/models/ops/src/cuda/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor ms_deform_attn_cuda_forward(
15 |     const at::Tensor &value, 
16 |     const at::Tensor &spatial_shapes,
17 |     const at::Tensor &level_start_index,
18 |     const at::Tensor &sampling_loc,
19 |     const at::Tensor &attn_weight,
20 |     const int im2col_step);
21 | 
22 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
23 |     const at::Tensor &value, 
24 |     const at::Tensor &spatial_shapes,
25 |     const at::Tensor &level_start_index,
26 |     const at::Tensor &sampling_loc,
27 |     const at::Tensor &attn_weight,
28 |     const at::Tensor &grad_output,
29 |     const int im2col_step);
30 | 
31 | 


--------------------------------------------------------------------------------
/models/ops/src/cpu/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor
15 | ms_deform_attn_cpu_forward(
16 |     const at::Tensor &value, 
17 |     const at::Tensor &spatial_shapes,
18 |     const at::Tensor &level_start_index,
19 |     const at::Tensor &sampling_loc,
20 |     const at::Tensor &attn_weight,
21 |     const int im2col_step);
22 | 
23 | std::vector<at::Tensor>
24 | ms_deform_attn_cpu_backward(
25 |     const at::Tensor &value, 
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/tools/run_dist_slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # --------------------------------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # --------------------------------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/open-mmlab/mmdetection/blob/3b53fe15d87860c6941f3dda63c0f27422da6266/tools/slurm_train.sh
 8 | # --------------------------------------------------------------------------------------------------------------------------
 9 | 
10 | set -x
11 | 
12 | PARTITION=$1
13 | JOB_NAME=$2
14 | GPUS=$3
15 | RUN_COMMAND=${@:4}
16 | if [ $GPUS -lt 8 ]; then
17 |     GPUS_PER_NODE=${GPUS_PER_NODE:-$GPUS}
18 | else
19 |     GPUS_PER_NODE=${GPUS_PER_NODE:-8}
20 | fi
21 | CPUS_PER_TASK=${CPUS_PER_TASK:-4}
22 | SRUN_ARGS=${SRUN_ARGS:-""}
23 | 
24 | srun -p ${PARTITION} \
25 |     --job-name=${JOB_NAME} \
26 |     --gres=gpu:${GPUS_PER_NODE} \
27 |     --ntasks=${GPUS} \
28 |     --ntasks-per-node=${GPUS_PER_NODE} \
29 |     --cpus-per-task=${CPUS_PER_TASK} \
30 |     --kill-on-bad-exit=1 \
31 |     ${SRUN_ARGS} \
32 |     ${RUN_COMMAND}
33 | 
34 | 


--------------------------------------------------------------------------------
/models/ops/src/cpu/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | 
17 | at::Tensor
18 | ms_deform_attn_cpu_forward(
19 |     const at::Tensor &value, 
20 |     const at::Tensor &spatial_shapes,
21 |     const at::Tensor &level_start_index,
22 |     const at::Tensor &sampling_loc,
23 |     const at::Tensor &attn_weight,
24 |     const int im2col_step)
25 | {
26 |     AT_ERROR("Not implement on cpu");
27 | }
28 | 
29 | std::vector<at::Tensor>
30 | ms_deform_attn_cpu_backward(
31 |     const at::Tensor &value, 
32 |     const at::Tensor &spatial_shapes,
33 |     const at::Tensor &level_start_index,
34 |     const at::Tensor &sampling_loc,
35 |     const at::Tensor &attn_weight,
36 |     const at::Tensor &grad_output,
37 |     const int im2col_step)
38 | {
39 |     AT_ERROR("Not implement on cpu");
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # H-DETR
 3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
 4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
 5 | # ------------------------------------------------------------------------
 6 | # Deformable DETR
 7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 9 | # ------------------------------------------------------------------------
10 | # Modified from DETR (https://github.com/facebookresearch/detr)
11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
12 | # ------------------------------------------------------------------------
13 | 
14 | import torch.utils.data
15 | from .torchvision_datasets import CocoDetection
16 | 
17 | from .coco import build as build_coco
18 | 
19 | 
20 | def get_coco_api_from_dataset(dataset):
21 |     for _ in range(10):
22 |         # if isinstance(dataset, torchvision.datasets.CocoDetection):
23 |         #     break
24 |         if isinstance(dataset, torch.utils.data.Subset):
25 |             dataset = dataset.dataset
26 |     if isinstance(dataset, CocoDetection):
27 |         return dataset.coco
28 | 
29 | 
30 | def build_dataset(image_set, args, eval_in_training_set=False):
31 |     if args.dataset_file == "coco":
32 |         return build_coco(image_set, args, eval_in_training_set)
33 |     if args.dataset_file == "coco_panoptic":
34 |         # to avoid making panopticapi required for coco
35 |         from .coco_panoptic import build as build_coco_panoptic
36 | 
37 |         return build_coco_panoptic(image_set, args)
38 |     raise ValueError(f"dataset {args.dataset_file} not supported")
39 | 


--------------------------------------------------------------------------------
/models/ops/src/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | 
13 | #include "cpu/ms_deform_attn_cpu.h"
14 | 
15 | #ifdef WITH_CUDA
16 | #include "cuda/ms_deform_attn_cuda.h"
17 | #endif
18 | 
19 | 
20 | at::Tensor
21 | ms_deform_attn_forward(
22 |     const at::Tensor &value, 
23 |     const at::Tensor &spatial_shapes,
24 |     const at::Tensor &level_start_index,
25 |     const at::Tensor &sampling_loc,
26 |     const at::Tensor &attn_weight,
27 |     const int im2col_step)
28 | {
29 |     if (value.type().is_cuda())
30 |     {
31 | #ifdef WITH_CUDA
32 |         return ms_deform_attn_cuda_forward(
33 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
34 | #else
35 |         AT_ERROR("Not compiled with GPU support");
36 | #endif
37 |     }
38 |     AT_ERROR("Not implemented on the CPU");
39 | }
40 | 
41 | std::vector<at::Tensor>
42 | ms_deform_attn_backward(
43 |     const at::Tensor &value, 
44 |     const at::Tensor &spatial_shapes,
45 |     const at::Tensor &level_start_index,
46 |     const at::Tensor &sampling_loc,
47 |     const at::Tensor &attn_weight,
48 |     const at::Tensor &grad_output,
49 |     const int im2col_step)
50 | {
51 |     if (value.type().is_cuda())
52 |     {
53 | #ifdef WITH_CUDA
54 |         return ms_deform_attn_cuda_backward(
55 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
56 | #else
57 |         AT_ERROR("Not compiled with GPU support");
58 | #endif
59 |     }
60 |     AT_ERROR("Not implemented on the CPU");
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/datasets/panoptic_eval.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR (https://github.com/facebookresearch/detr)
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | # ------------------------------------------------------------------------
 9 | 
10 | import json
11 | import os
12 | 
13 | import util.misc as utils
14 | 
15 | try:
16 |     from panopticapi.evaluation import pq_compute
17 | except ImportError:
18 |     pass
19 | 
20 | 
21 | class PanopticEvaluator(object):
22 |     def __init__(self, ann_file, ann_folder, output_dir="panoptic_eval"):
23 |         self.gt_json = ann_file
24 |         self.gt_folder = ann_folder
25 |         if utils.is_main_process():
26 |             if not os.path.exists(output_dir):
27 |                 os.mkdir(output_dir)
28 |         self.output_dir = output_dir
29 |         self.predictions = []
30 | 
31 |     def update(self, predictions):
32 |         for p in predictions:
33 |             with open(os.path.join(self.output_dir, p["file_name"]), "wb") as f:
34 |                 f.write(p.pop("png_string"))
35 | 
36 |         self.predictions += predictions
37 | 
38 |     def synchronize_between_processes(self):
39 |         all_predictions = utils.all_gather(self.predictions)
40 |         merged_predictions = []
41 |         for p in all_predictions:
42 |             merged_predictions += p
43 |         self.predictions = merged_predictions
44 | 
45 |     def summarize(self):
46 |         if utils.is_main_process():
47 |             json_data = {"annotations": self.predictions}
48 |             predictions_json = os.path.join(self.output_dir, "predictions.json")
49 |             with open(predictions_json, "w") as f:
50 |                 f.write(json.dumps(json_data))
51 |             return pq_compute(self.gt_json, predictions_json, gt_folder=self.gt_folder, pred_folder=self.output_dir)
52 |         return None
53 | 


--------------------------------------------------------------------------------
/models/ops/setup.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | import os
10 | import glob
11 | 
12 | import torch
13 | 
14 | from torch.utils.cpp_extension import CUDA_HOME
15 | from torch.utils.cpp_extension import CppExtension
16 | from torch.utils.cpp_extension import CUDAExtension
17 | 
18 | from setuptools import find_packages
19 | from setuptools import setup
20 | 
21 | requirements = ["torch", "torchvision"]
22 | 
23 | def get_extensions():
24 |     this_dir = os.path.dirname(os.path.abspath(__file__))
25 |     extensions_dir = os.path.join(this_dir, "src")
26 | 
27 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
28 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
29 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
30 | 
31 |     sources = main_file + source_cpu
32 |     extension = CppExtension
33 |     extra_compile_args = {"cxx": []}
34 |     define_macros = []
35 | 
36 |     if torch.cuda.is_available() and CUDA_HOME is not None:
37 |         extension = CUDAExtension
38 |         sources += source_cuda
39 |         define_macros += [("WITH_CUDA", None)]
40 |         extra_compile_args["nvcc"] = [
41 |             "-DCUDA_HAS_FP16=1",
42 |             "-D__CUDA_NO_HALF_OPERATORS__",
43 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
44 |             "-D__CUDA_NO_HALF2_OPERATORS__",
45 |         ]
46 |     else:
47 |         raise NotImplementedError('Cuda is not availabel')
48 | 
49 |     sources = [os.path.join(extensions_dir, s) for s in sources]
50 |     include_dirs = [extensions_dir]
51 |     ext_modules = [
52 |         extension(
53 |             "MultiScaleDeformableAttention",
54 |             sources,
55 |             include_dirs=include_dirs,
56 |             define_macros=define_macros,
57 |             extra_compile_args=extra_compile_args,
58 |         )
59 |     ]
60 |     return ext_modules
61 | 
62 | setup(
63 |     name="MultiScaleDeformableAttention",
64 |     version="1.0",
65 |     author="Weijie Su",
66 |     url="https://github.com/fundamentalvision/Deformable-DETR",
67 |     description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
68 |     packages=find_packages(exclude=("configs", "tests",)),
69 |     ext_modules=get_extensions(),
70 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
71 | )
72 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | 
 7 | """
 8 | Benchmark inference speed of Deformable DETR.
 9 | """
10 | import os
11 | import time
12 | import argparse
13 | 
14 | import torch
15 | 
16 | from main import get_args_parser as get_main_args_parser
17 | from models import build_model
18 | from datasets import build_dataset
19 | from util.misc import nested_tensor_from_tensor_list
20 | 
21 | 
22 | def get_benckmark_arg_parser():
23 |     parser = argparse.ArgumentParser("Benchmark inference speed of Deformable DETR.")
24 |     parser.add_argument(
25 |         "--num_iters", type=int, default=300, help="total iters to benchmark speed"
26 |     )
27 |     parser.add_argument(
28 |         "--warm_iters",
29 |         type=int,
30 |         default=5,
31 |         help="ignore first several iters that are very slow",
32 |     )
33 |     parser.add_argument(
34 |         "--batch_size", type=int, default=1, help="batch size in inference"
35 |     )
36 |     parser.add_argument("--resume", type=str, help="load the pre-trained checkpoint")
37 |     return parser
38 | 
39 | 
40 | @torch.no_grad()
41 | def measure_average_inference_time(model, inputs, num_iters=100, warm_iters=5):
42 |     ts = []
43 |     for iter_ in range(num_iters):
44 |         torch.cuda.synchronize()
45 |         t_ = time.perf_counter()
46 |         model(inputs)
47 |         torch.cuda.synchronize()
48 |         t = time.perf_counter() - t_
49 |         if iter_ >= warm_iters:
50 |             ts.append(t)
51 |     print(ts)
52 |     return sum(ts) / len(ts)
53 | 
54 | 
55 | def benchmark():
56 |     args, _ = get_benckmark_arg_parser().parse_known_args()
57 |     main_args = get_main_args_parser().parse_args(_)
58 |     assert (
59 |         args.warm_iters < args.num_iters and args.num_iters > 0 and args.warm_iters >= 0
60 |     )
61 |     assert args.batch_size > 0
62 |     assert args.resume is None or os.path.exists(args.resume)
63 |     dataset = build_dataset("val", main_args)
64 |     model, _, _ = build_model(main_args)
65 |     model.cuda()
66 |     model.eval()
67 |     if args.resume is not None:
68 |         ckpt = torch.load(args.resume, map_location=lambda storage, loc: storage)
69 |         model.load_state_dict(ckpt["model"])
70 |     inputs = nested_tensor_from_tensor_list(
71 |         [dataset.__getitem__(0)[0].cuda() for _ in range(args.batch_size)]
72 |     )
73 |     t = measure_average_inference_time(model, inputs, args.num_iters, args.warm_iters)
74 |     return 1.0 / t * args.batch_size
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     fps = benchmark()
79 |     print(f"Inference Speed: {fps:.1f} FPS")
80 | 
81 | 


--------------------------------------------------------------------------------
/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print("apex is not installed")
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f"meta must be a dict or None, but got {type(meta)}")
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, "CLASSES") and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {"meta": meta, "state_dict": weights_to_cpu(get_state_dict(model))}
46 |     # save optimizer state dict in the checkpoint
47 |     if isinstance(optimizer, Optimizer):
48 |         checkpoint["optimizer"] = optimizer.state_dict()
49 |     elif isinstance(optimizer, dict):
50 |         checkpoint["optimizer"] = {}
51 |         for name, optim in optimizer.items():
52 |             checkpoint["optimizer"][name] = optim.state_dict()
53 | 
54 |     # save amp state dict in the checkpoint
55 |     checkpoint["amp"] = apex.amp.state_dict()
56 | 
57 |     if filename.startswith("pavi://"):
58 |         try:
59 |             from pavi import modelcloud
60 |             from pavi.exception import NodeNotFoundError
61 |         except ImportError:
62 |             raise ImportError("Please install pavi to load checkpoint from modelcloud.")
63 |         model_path = filename[7:]
64 |         root = modelcloud.Folder()
65 |         model_dir, model_name = osp.split(model_path)
66 |         try:
67 |             model = modelcloud.get(model_dir)
68 |         except NodeNotFoundError:
69 |             model = root.create_training_model(model_dir)
70 |         with TemporaryDirectory() as tmp_dir:
71 |             checkpoint_file = osp.join(tmp_dir, model_name)
72 |             with open(checkpoint_file, "wb") as f:
73 |                 torch.save(checkpoint, f)
74 |                 f.flush()
75 |             model.create_file(checkpoint_file, name=model_name)
76 |     else:
77 |         mmcv.mkdir_or_exist(osp.dirname(filename))
78 |         # immediately flush buffer
79 |         with open(filename, "wb") as f:
80 |             torch.save(checkpoint, f)
81 |             f.flush()
82 | 


--------------------------------------------------------------------------------
/datasets/data_prefetcher.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | 
 7 | import torch
 8 | 
 9 | def to_cuda(samples, targets, device):
10 |     samples = samples.to(device, non_blocking=True)
11 |     targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
12 |     return samples, targets
13 | 
14 | class data_prefetcher():
15 |     def __init__(self, loader, device, prefetch=True):
16 |         self.loader = iter(loader)
17 |         self.prefetch = prefetch
18 |         self.device = device
19 |         if prefetch:
20 |             self.stream = torch.cuda.Stream()
21 |             self.preload()
22 | 
23 |     def preload(self):
24 |         try:
25 |             self.next_samples, self.next_targets = next(self.loader)
26 |         except StopIteration:
27 |             self.next_samples = None
28 |             self.next_targets = None
29 |             return
30 |         # if record_stream() doesn't work, another option is to make sure device inputs are created
31 |         # on the main stream.
32 |         # self.next_input_gpu = torch.empty_like(self.next_input, device='cuda')
33 |         # self.next_target_gpu = torch.empty_like(self.next_target, device='cuda')
34 |         # Need to make sure the memory allocated for next_* is not still in use by the main stream
35 |         # at the time we start copying to next_*:
36 |         # self.stream.wait_stream(torch.cuda.current_stream())
37 |         with torch.cuda.stream(self.stream):
38 |             self.next_samples, self.next_targets = to_cuda(self.next_samples, self.next_targets, self.device)
39 |             # more code for the alternative if record_stream() doesn't work:
40 |             # copy_ will record the use of the pinned source tensor in this side stream.
41 |             # self.next_input_gpu.copy_(self.next_input, non_blocking=True)
42 |             # self.next_target_gpu.copy_(self.next_target, non_blocking=True)
43 |             # self.next_input = self.next_input_gpu
44 |             # self.next_target = self.next_target_gpu
45 | 
46 |             # With Amp, it isn't necessary to manually convert data to half.
47 |             # if args.fp16:
48 |             #     self.next_input = self.next_input.half()
49 |             # else:
50 | 
51 |     def next(self):
52 |         if self.prefetch:
53 |             torch.cuda.current_stream().wait_stream(self.stream)
54 |             samples = self.next_samples
55 |             targets = self.next_targets
56 |             if samples is not None:
57 |                 samples.record_stream(torch.cuda.current_stream())
58 |             if targets is not None:
59 |                 for t in targets:
60 |                     for k, v in t.items():
61 |                         v.record_stream(torch.cuda.current_stream())
62 |             self.preload()
63 |         else:
64 |             try:
65 |                 samples, targets = next(self.loader)
66 |                 samples, targets = to_cuda(samples, targets, self.device)
67 |             except StopIteration:
68 |                 samples = None
69 |                 targets = None
70 |         return samples, targets
71 | 


--------------------------------------------------------------------------------
/util/box_ops.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR (https://github.com/facebookresearch/detr)
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | # ------------------------------------------------------------------------
 9 | 
10 | """
11 | Utilities for bounding box manipulation and GIoU.
12 | """
13 | import torch
14 | from torchvision.ops.boxes import box_area
15 | 
16 | 
17 | def box_cxcywh_to_xyxy(x):
18 |     x_c, y_c, w, h = x.unbind(-1)
19 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
20 |     return torch.stack(b, dim=-1)
21 | 
22 | 
23 | def box_xyxy_to_cxcywh(x):
24 |     x0, y0, x1, y1 = x.unbind(-1)
25 |     b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
26 |     return torch.stack(b, dim=-1)
27 | 
28 | 
29 | # modified from torchvision to also return the union
30 | def box_iou(boxes1, boxes2):
31 |     area1 = box_area(boxes1)
32 |     area2 = box_area(boxes2)
33 | 
34 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
35 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
36 | 
37 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
38 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
39 | 
40 |     union = area1[:, None] + area2 - inter
41 | 
42 |     iou = inter / union
43 |     return iou, union
44 | 
45 | 
46 | def generalized_box_iou(boxes1, boxes2):
47 |     """
48 |     Generalized IoU from https://giou.stanford.edu/
49 | 
50 |     The boxes should be in [x0, y0, x1, y1] format
51 | 
52 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
53 |     and M = len(boxes2)
54 |     """
55 |     # degenerate boxes gives inf / nan results
56 |     # so do an early check
57 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
58 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
59 |     iou, union = box_iou(boxes1, boxes2)
60 | 
61 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
62 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
63 | 
64 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
65 |     area = wh[:, :, 0] * wh[:, :, 1]
66 | 
67 |     return iou - (area - union) / area
68 | 
69 | 
70 | def masks_to_boxes(masks):
71 |     """Compute the bounding boxes around the provided masks
72 | 
73 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
74 | 
75 |     Returns a [N, 4] tensors, with the boxes in xyxy format
76 |     """
77 |     if masks.numel() == 0:
78 |         return torch.zeros((0, 4), device=masks.device)
79 | 
80 |     h, w = masks.shape[-2:]
81 | 
82 |     y = torch.arange(0, h, dtype=torch.float)
83 |     x = torch.arange(0, w, dtype=torch.float)
84 |     y, x = torch.meshgrid(y, x)
85 | 
86 |     x_mask = masks * x.unsqueeze(0)
87 |     x_max = x_mask.flatten(1).max(-1)[0]
88 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
89 | 
90 |     y_mask = masks * y.unsqueeze(0)
91 |     y_max = y_mask.flatten(1).max(-1)[0]
92 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
93 | 
94 |     return torch.stack([x_min, y_min, x_max, y_max], 1)
95 | 


--------------------------------------------------------------------------------
/datasets/torchvision_datasets/coco.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | # Modified from torchvision
 7 | # ------------------------------------------------------------------------
 8 | 
 9 | """
10 | Copy-Paste from torchvision, but add utility of caching images on memory
11 | """
12 | from torchvision.datasets.vision import VisionDataset
13 | from PIL import Image
14 | import os
15 | import os.path
16 | import tqdm
17 | from io import BytesIO
18 | 
19 | 
20 | class CocoDetection(VisionDataset):
21 |     """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
22 |     Args:
23 |         root (string): Root directory where images are downloaded to.
24 |         annFile (string): Path to json annotation file.
25 |         transform (callable, optional): A function/transform that  takes in an PIL image
26 |             and returns a transformed version. E.g, ``transforms.ToTensor``
27 |         target_transform (callable, optional): A function/transform that takes in the
28 |             target and transforms it.
29 |         transforms (callable, optional): A function/transform that takes input sample and its target as entry
30 |             and returns a transformed version.
31 |     """
32 | 
33 |     def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None,
34 |                  cache_mode=False, local_rank=0, local_size=1):
35 |         super(CocoDetection, self).__init__(root, transforms, transform, target_transform)
36 |         from pycocotools.coco import COCO
37 |         self.coco = COCO(annFile)
38 |         self.ids = list(sorted(self.coco.imgs.keys()))
39 |         self.cache_mode = cache_mode
40 |         self.local_rank = local_rank
41 |         self.local_size = local_size
42 |         if cache_mode:
43 |             self.cache = {}
44 |             self.cache_images()
45 | 
46 |     def cache_images(self):
47 |         self.cache = {}
48 |         for index, img_id in zip(tqdm.trange(len(self.ids)), self.ids):
49 |             if index % self.local_size != self.local_rank:
50 |                 continue
51 |             path = self.coco.loadImgs(img_id)[0]['file_name']
52 |             with open(os.path.join(self.root, path), 'rb') as f:
53 |                 self.cache[path] = f.read()
54 | 
55 |     def get_image(self, path):
56 |         if self.cache_mode:
57 |             if path not in self.cache.keys():
58 |                 with open(os.path.join(self.root, path), 'rb') as f:
59 |                     self.cache[path] = f.read()
60 |             return Image.open(BytesIO(self.cache[path])).convert('RGB')
61 |         return Image.open(os.path.join(self.root, path)).convert('RGB')
62 | 
63 |     def __getitem__(self, index):
64 |         """
65 |         Args:
66 |             index (int): Index
67 |         Returns:
68 |             tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
69 |         """
70 |         coco = self.coco
71 |         img_id = self.ids[index]
72 |         ann_ids = coco.getAnnIds(imgIds=img_id)
73 |         target = coco.loadAnns(ann_ids)
74 | 
75 |         path = coco.loadImgs(img_id)[0]['file_name']
76 | 
77 |         img = self.get_image(path)
78 |         if self.transforms is not None:
79 |             img, target = self.transforms(img, target)
80 | 
81 |         return img, target
82 | 
83 |     def __len__(self):
84 |         return len(self.ids)
85 | 


--------------------------------------------------------------------------------
/mmcv_custom/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Open-MMLab. All rights reserved.
  2 | import os.path as osp
  3 | import platform
  4 | import shutil
  5 | 
  6 | import torch
  7 | from torch.optim import Optimizer
  8 | 
  9 | import mmcv
 10 | from mmcv.runner import RUNNERS, EpochBasedRunner
 11 | from .checkpoint import save_checkpoint
 12 | 
 13 | try:
 14 |     import apex
 15 | except:
 16 |     print("apex is not installed")
 17 | 
 18 | 
 19 | @RUNNERS.register_module()
 20 | class EpochBasedRunnerAmp(EpochBasedRunner):
 21 |     """Epoch-based Runner with AMP support.
 22 | 
 23 |     This runner train models epoch by epoch.
 24 |     """
 25 | 
 26 |     def save_checkpoint(
 27 |         self,
 28 |         out_dir,
 29 |         filename_tmpl="epoch_{}.pth",
 30 |         save_optimizer=True,
 31 |         meta=None,
 32 |         create_symlink=True,
 33 |     ):
 34 |         """Save the checkpoint.
 35 | 
 36 |         Args:
 37 |             out_dir (str): The directory that checkpoints are saved.
 38 |             filename_tmpl (str, optional): The checkpoint filename template,
 39 |                 which contains a placeholder for the epoch number.
 40 |                 Defaults to 'epoch_{}.pth'.
 41 |             save_optimizer (bool, optional): Whether to save the optimizer to
 42 |                 the checkpoint. Defaults to True.
 43 |             meta (dict, optional): The meta information to be saved in the
 44 |                 checkpoint. Defaults to None.
 45 |             create_symlink (bool, optional): Whether to create a symlink
 46 |                 "latest.pth" to point to the latest checkpoint.
 47 |                 Defaults to True.
 48 |         """
 49 |         if meta is None:
 50 |             meta = dict(epoch=self.epoch + 1, iter=self.iter)
 51 |         elif isinstance(meta, dict):
 52 |             meta.update(epoch=self.epoch + 1, iter=self.iter)
 53 |         else:
 54 |             raise TypeError(f"meta should be a dict or None, but got {type(meta)}")
 55 |         if self.meta is not None:
 56 |             meta.update(self.meta)
 57 | 
 58 |         filename = filename_tmpl.format(self.epoch + 1)
 59 |         filepath = osp.join(out_dir, filename)
 60 |         optimizer = self.optimizer if save_optimizer else None
 61 |         save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
 62 |         # in some environments, `os.symlink` is not supported, you may need to
 63 |         # set `create_symlink` to False
 64 |         if create_symlink:
 65 |             dst_file = osp.join(out_dir, "latest.pth")
 66 |             if platform.system() != "Windows":
 67 |                 mmcv.symlink(filename, dst_file)
 68 |             else:
 69 |                 shutil.copy(filepath, dst_file)
 70 | 
 71 |     def resume(self, checkpoint, resume_optimizer=True, map_location="default"):
 72 |         if map_location == "default":
 73 |             if torch.cuda.is_available():
 74 |                 device_id = torch.cuda.current_device()
 75 |                 checkpoint = self.load_checkpoint(
 76 |                     checkpoint,
 77 |                     map_location=lambda storage, loc: storage.cuda(device_id),
 78 |                 )
 79 |             else:
 80 |                 checkpoint = self.load_checkpoint(checkpoint)
 81 |         else:
 82 |             checkpoint = self.load_checkpoint(checkpoint, map_location=map_location)
 83 | 
 84 |         self._epoch = checkpoint["meta"]["epoch"]
 85 |         self._iter = checkpoint["meta"]["iter"]
 86 |         if "optimizer" in checkpoint and resume_optimizer:
 87 |             if isinstance(self.optimizer, Optimizer):
 88 |                 self.optimizer.load_state_dict(checkpoint["optimizer"])
 89 |             elif isinstance(self.optimizer, dict):
 90 |                 for k in self.optimizer.keys():
 91 |                     self.optimizer[k].load_state_dict(checkpoint["optimizer"][k])
 92 |             else:
 93 |                 raise TypeError(
 94 |                     "Optimizer should be dict or torch.optim.Optimizer "
 95 |                     f"but got {type(self.optimizer)}"
 96 |                 )
 97 | 
 98 |         if "amp" in checkpoint:
 99 |             apex.amp.load_state_dict(checkpoint["amp"])
100 |             self.logger.info("load amp state dict")
101 | 
102 |         self.logger.info("resumed epoch %d, iter %d", self.epoch, self.iter)
103 | 
104 | 


--------------------------------------------------------------------------------
/models/ops/test.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 | 
13 | import time
14 | import torch
15 | import torch.nn as nn
16 | from torch.autograd import gradcheck
17 | 
18 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
19 | 
20 | 
21 | N, M, D = 1, 2, 2
22 | Lq, L, P = 2, 2, 2
23 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
24 | level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
25 | S = sum([(H*W).item() for H, W in shapes])
26 | 
27 | 
28 | torch.manual_seed(3)
29 | 
30 | 
31 | @torch.no_grad()
32 | def check_forward_equal_with_pytorch_double():
33 |     value = torch.rand(N, S, M, D).cuda() * 0.01
34 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
35 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
36 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
37 |     im2col_step = 2
38 |     output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
39 |     output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
40 |     fwdok = torch.allclose(output_cuda, output_pytorch)
41 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
42 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
43 | 
44 |     print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
45 | 
46 | 
47 | @torch.no_grad()
48 | def check_forward_equal_with_pytorch_float():
49 |     value = torch.rand(N, S, M, D).cuda() * 0.01
50 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
51 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
52 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
53 |     im2col_step = 2
54 |     output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
55 |     output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
56 |     fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
57 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
58 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
59 | 
60 |     print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
61 | 
62 | 
63 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
64 | 
65 |     value = torch.rand(N, S, M, channels).cuda() * 0.01
66 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
67 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
68 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
69 |     im2col_step = 2
70 |     func = MSDeformAttnFunction.apply
71 | 
72 |     value.requires_grad = grad_value
73 |     sampling_locations.requires_grad = grad_sampling_loc
74 |     attention_weights.requires_grad = grad_attn_weight
75 | 
76 |     gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
77 | 
78 |     print(f'* {gradok} check_gradient_numerical(D={channels})')
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     check_forward_equal_with_pytorch_double()
83 |     check_forward_equal_with_pytorch_float()
84 | 
85 |     for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
86 |         check_gradient_numerical(channels, True, True, True)
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/models/position_encoding.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | Various positional encodings for the transformer.
 12 | """
 13 | import math
 14 | import torch
 15 | from torch import nn
 16 | 
 17 | from util.misc import NestedTensor
 18 | 
 19 | 
 20 | class PositionEmbeddingSine(nn.Module):
 21 |     """
 22 |     This is a more standard version of the position embedding, very similar to the one
 23 |     used by the Attention is all you need paper, generalized to work on images.
 24 |     """
 25 | 
 26 |     def __init__(
 27 |         self, num_pos_feats=64, temperature=10000, normalize=False, scale=None
 28 |     ):
 29 |         super().__init__()
 30 |         self.num_pos_feats = num_pos_feats
 31 |         self.temperature = temperature
 32 |         self.normalize = normalize
 33 |         if scale is not None and normalize is False:
 34 |             raise ValueError("normalize should be True if scale is passed")
 35 |         if scale is None:
 36 |             scale = 2 * math.pi
 37 |         self.scale = scale
 38 | 
 39 |     def forward(self, tensor_list: NestedTensor):
 40 |         x = tensor_list.tensors
 41 |         mask = tensor_list.mask
 42 |         assert mask is not None
 43 |         not_mask = ~mask
 44 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
 45 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
 46 |         if self.normalize:
 47 |             eps = 1e-6
 48 |             y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale
 49 |             x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale
 50 | 
 51 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
 52 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
 53 | 
 54 |         pos_x = x_embed[:, :, :, None] / dim_t
 55 |         pos_y = y_embed[:, :, :, None] / dim_t
 56 |         pos_x = torch.stack(
 57 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
 58 |         ).flatten(3)
 59 |         pos_y = torch.stack(
 60 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
 61 |         ).flatten(3)
 62 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
 63 |         return pos
 64 | 
 65 | 
 66 | class PositionEmbeddingLearned(nn.Module):
 67 |     """
 68 |     Absolute pos embedding, learned.
 69 |     """
 70 | 
 71 |     def __init__(self, num_pos_feats=256):
 72 |         super().__init__()
 73 |         self.row_embed = nn.Embedding(50, num_pos_feats)
 74 |         self.col_embed = nn.Embedding(50, num_pos_feats)
 75 |         self.reset_parameters()
 76 | 
 77 |     def reset_parameters(self):
 78 |         nn.init.uniform_(self.row_embed.weight)
 79 |         nn.init.uniform_(self.col_embed.weight)
 80 | 
 81 |     def forward(self, tensor_list: NestedTensor):
 82 |         x = tensor_list.tensors
 83 |         h, w = x.shape[-2:]
 84 |         i = torch.arange(w, device=x.device)
 85 |         j = torch.arange(h, device=x.device)
 86 |         x_emb = self.col_embed(i)
 87 |         y_emb = self.row_embed(j)
 88 |         pos = (
 89 |             torch.cat(
 90 |                 [
 91 |                     x_emb.unsqueeze(0).repeat(h, 1, 1),
 92 |                     y_emb.unsqueeze(1).repeat(1, w, 1),
 93 |                 ],
 94 |                 dim=-1,
 95 |             )
 96 |             .permute(2, 0, 1)
 97 |             .unsqueeze(0)
 98 |             .repeat(x.shape[0], 1, 1, 1)
 99 |         )
100 |         return pos
101 | 
102 | 
103 | def build_position_encoding(args):
104 |     N_steps = args.hidden_dim // 2
105 |     if args.position_embedding in ("v2", "sine"):
106 |         # TODO find a better way of exposing other arguments
107 |         position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
108 |     elif args.position_embedding in ("v3", "learned"):
109 |         position_embedding = PositionEmbeddingLearned(N_steps)
110 |     else:
111 |         raise ValueError(f"not supported {args.position_embedding}")
112 | 
113 |     return position_embedding
114 | 


--------------------------------------------------------------------------------
/datasets/coco_panoptic.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | import json
 11 | from pathlib import Path
 12 | 
 13 | import numpy as np
 14 | import torch
 15 | from PIL import Image
 16 | 
 17 | from panopticapi.utils import rgb2id
 18 | from util.box_ops import masks_to_boxes
 19 | 
 20 | from .coco import make_coco_transforms
 21 | 
 22 | 
 23 | class CocoPanoptic:
 24 |     def __init__(self, img_folder, ann_folder, ann_file, transforms=None, return_masks=True):
 25 |         with open(ann_file, 'r') as f:
 26 |             self.coco = json.load(f)
 27 | 
 28 |         # sort 'images' field so that they are aligned with 'annotations'
 29 |         # i.e., in alphabetical order
 30 |         self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
 31 |         # sanity check
 32 |         if "annotations" in self.coco:
 33 |             for img, ann in zip(self.coco['images'], self.coco['annotations']):
 34 |                 assert img['file_name'][:-4] == ann['file_name'][:-4]
 35 | 
 36 |         self.img_folder = img_folder
 37 |         self.ann_folder = ann_folder
 38 |         self.ann_file = ann_file
 39 |         self.transforms = transforms
 40 |         self.return_masks = return_masks
 41 | 
 42 |     def __getitem__(self, idx):
 43 |         ann_info = self.coco['annotations'][idx] if "annotations" in self.coco else self.coco['images'][idx]
 44 |         img_path = Path(self.img_folder) / ann_info['file_name'].replace('.png', '.jpg')
 45 |         ann_path = Path(self.ann_folder) / ann_info['file_name']
 46 | 
 47 |         img = Image.open(img_path).convert('RGB')
 48 |         w, h = img.size
 49 |         if "segments_info" in ann_info:
 50 |             masks = np.asarray(Image.open(ann_path), dtype=np.uint32)
 51 |             masks = rgb2id(masks)
 52 | 
 53 |             ids = np.array([ann['id'] for ann in ann_info['segments_info']])
 54 |             masks = masks == ids[:, None, None]
 55 | 
 56 |             masks = torch.as_tensor(masks, dtype=torch.uint8)
 57 |             labels = torch.tensor([ann['category_id'] for ann in ann_info['segments_info']], dtype=torch.int64)
 58 | 
 59 |         target = {}
 60 |         target['image_id'] = torch.tensor([ann_info['image_id'] if "image_id" in ann_info else ann_info["id"]])
 61 |         if self.return_masks:
 62 |             target['masks'] = masks
 63 |         target['labels'] = labels
 64 | 
 65 |         target["boxes"] = masks_to_boxes(masks)
 66 | 
 67 |         target['size'] = torch.as_tensor([int(h), int(w)])
 68 |         target['orig_size'] = torch.as_tensor([int(h), int(w)])
 69 |         if "segments_info" in ann_info:
 70 |             for name in ['iscrowd', 'area']:
 71 |                 target[name] = torch.tensor([ann[name] for ann in ann_info['segments_info']])
 72 | 
 73 |         if self.transforms is not None:
 74 |             img, target = self.transforms(img, target)
 75 | 
 76 |         return img, target
 77 | 
 78 |     def __len__(self):
 79 |         return len(self.coco['images'])
 80 | 
 81 |     def get_height_and_width(self, idx):
 82 |         img_info = self.coco['images'][idx]
 83 |         height = img_info['height']
 84 |         width = img_info['width']
 85 |         return height, width
 86 | 
 87 | 
 88 | def build(image_set, args):
 89 |     img_folder_root = Path(args.coco_path)
 90 |     ann_folder_root = Path(args.coco_panoptic_path)
 91 |     assert img_folder_root.exists(), f'provided COCO path {img_folder_root} does not exist'
 92 |     assert ann_folder_root.exists(), f'provided COCO path {ann_folder_root} does not exist'
 93 |     mode = 'panoptic'
 94 |     PATHS = {
 95 |         "train": ("train2017", Path("annotations") / f'{mode}_train2017.json'),
 96 |         "val": ("val2017", Path("annotations") / f'{mode}_val2017.json'),
 97 |     }
 98 | 
 99 |     img_folder, ann_file = PATHS[image_set]
100 |     img_folder_path = img_folder_root / img_folder
101 |     ann_folder = ann_folder_root / f'{mode}_{img_folder}'
102 |     ann_file = ann_folder_root / ann_file
103 | 
104 |     dataset = CocoPanoptic(img_folder_path, ann_folder, ann_file,
105 |                            transforms=make_coco_transforms(image_set), return_masks=args.masks)
106 | 
107 |     return dataset
108 | 


--------------------------------------------------------------------------------
/models/ops/functions/ms_deform_attn_func.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # H-DETR
  3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
  4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
  5 | # ------------------------------------------------------------------------------------------------
  6 | # Deformable DETR
  7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  9 | # ------------------------------------------------------------------------------------------------
 10 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 11 | # ------------------------------------------------------------------------------------------------
 12 | 
 13 | from __future__ import absolute_import
 14 | from __future__ import print_function
 15 | from __future__ import division
 16 | 
 17 | import torch
 18 | import torch.nn.functional as F
 19 | from torch.autograd import Function
 20 | from torch.autograd.function import once_differentiable
 21 | 
 22 | import MultiScaleDeformableAttention as MSDA
 23 | 
 24 | 
 25 | class MSDeformAttnFunction(Function):
 26 |     @staticmethod
 27 |     @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
 28 |     def forward(
 29 |         ctx,
 30 |         value,
 31 |         value_spatial_shapes,
 32 |         value_level_start_index,
 33 |         sampling_locations,
 34 |         attention_weights,
 35 |         im2col_step,
 36 |     ):
 37 |         ctx.im2col_step = im2col_step
 38 |         output = MSDA.ms_deform_attn_forward(
 39 |             value,
 40 |             value_spatial_shapes,
 41 |             value_level_start_index,
 42 |             sampling_locations,
 43 |             attention_weights,
 44 |             ctx.im2col_step,
 45 |         )
 46 |         ctx.save_for_backward(
 47 |             value,
 48 |             value_spatial_shapes,
 49 |             value_level_start_index,
 50 |             sampling_locations,
 51 |             attention_weights,
 52 |         )
 53 |         return output
 54 | 
 55 |     @staticmethod
 56 |     @once_differentiable
 57 |     @torch.cuda.amp.custom_bwd
 58 |     def backward(ctx, grad_output):
 59 |         (
 60 |             value,
 61 |             value_spatial_shapes,
 62 |             value_level_start_index,
 63 |             sampling_locations,
 64 |             attention_weights,
 65 |         ) = ctx.saved_tensors
 66 |         grad_value, grad_sampling_loc, grad_attn_weight = MSDA.ms_deform_attn_backward(
 67 |             value,
 68 |             value_spatial_shapes,
 69 |             value_level_start_index,
 70 |             sampling_locations,
 71 |             attention_weights,
 72 |             grad_output,
 73 |             ctx.im2col_step,
 74 |         )
 75 | 
 76 |         return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
 77 | 
 78 | 
 79 | def ms_deform_attn_core_pytorch(
 80 |     value, value_spatial_shapes, sampling_locations, attention_weights
 81 | ):
 82 |     # for debug and test only,
 83 |     # need to use cuda version instead
 84 |     N_, S_, M_, D_ = value.shape
 85 |     _, Lq_, M_, L_, P_, _ = sampling_locations.shape
 86 |     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
 87 |     sampling_grids = 2 * sampling_locations - 1
 88 |     sampling_value_list = []
 89 |     for lid_, (H_, W_) in enumerate(value_spatial_shapes):
 90 |         # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
 91 |         value_l_ = (
 92 |             value_list[lid_].flatten(2).transpose(1, 2).reshape(N_ * M_, D_, H_, W_)
 93 |         )
 94 |         # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
 95 |         sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
 96 |         # N_*M_, D_, Lq_, P_
 97 |         sampling_value_l_ = F.grid_sample(
 98 |             value_l_,
 99 |             sampling_grid_l_,
100 |             mode="bilinear",
101 |             padding_mode="zeros",
102 |             align_corners=False,
103 |         )
104 |         sampling_value_list.append(sampling_value_l_)
105 |     # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
106 |     attention_weights = attention_weights.transpose(1, 2).reshape(
107 |         N_ * M_, 1, Lq_, L_ * P_
108 |     )
109 |     output = (
110 |         (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights)
111 |         .sum(-1)
112 |         .view(N_, M_ * D_, Lq_)
113 |     )
114 |     return output.transpose(1, 2).contiguous()
115 | 


--------------------------------------------------------------------------------
/util/plot_utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | Plotting utilities to visualize training logs.
 12 | """
 13 | import torch
 14 | import pandas as pd
 15 | import seaborn as sns
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | from pathlib import Path, PurePath
 19 | 
 20 | 
 21 | def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'):
 22 |     '''
 23 |     Function to plot specific fields from training log(s). Plots both training and test results.
 24 | 
 25 |     :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
 26 |               - fields = which results to plot from each log file - plots both training and test for each field.
 27 |               - ewm_col = optional, which column to use as the exponential weighted smoothing of the plots
 28 |               - log_name = optional, name of log file if different than default 'log.txt'.
 29 | 
 30 |     :: Outputs - matplotlib plots of results in fields, color coded for each log file.
 31 |                - solid lines are training results, dashed lines are test results.
 32 | 
 33 |     '''
 34 |     func_name = "plot_utils.py::plot_logs"
 35 | 
 36 |     # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
 37 |     # convert single Path to list to avoid 'not iterable' error
 38 | 
 39 |     if not isinstance(logs, list):
 40 |         if isinstance(logs, PurePath):
 41 |             logs = [logs]
 42 |             print(f"{func_name} info: logs param expects a list argument, converted to list[Path].")
 43 |         else:
 44 |             raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \
 45 |             Expect list[Path] or single Path obj, received {type(logs)}")
 46 | 
 47 |     # verify valid dir(s) and that every item in list is Path object
 48 |     for i, dir in enumerate(logs):
 49 |         if not isinstance(dir, PurePath):
 50 |             raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}")
 51 |         if dir.exists():
 52 |             continue
 53 |         raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}")
 54 | 
 55 |     # load log file(s) and plot
 56 |     dfs = [pd.read_json(Path(p) / log_name, lines=True) for p in logs]
 57 | 
 58 |     fig, axs = plt.subplots(ncols=len(fields), figsize=(16, 5))
 59 | 
 60 |     for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
 61 |         for j, field in enumerate(fields):
 62 |             if field == 'mAP':
 63 |                 coco_eval = pd.DataFrame(pd.np.stack(df.test_coco_eval.dropna().values)[:, 1]).ewm(com=ewm_col).mean()
 64 |                 axs[j].plot(coco_eval, c=color)
 65 |             else:
 66 |                 df.interpolate().ewm(com=ewm_col).mean().plot(
 67 |                     y=[f'train_{field}', f'test_{field}'],
 68 |                     ax=axs[j],
 69 |                     color=[color] * 2,
 70 |                     style=['-', '--']
 71 |                 )
 72 |     for ax, field in zip(axs, fields):
 73 |         ax.legend([Path(p).name for p in logs])
 74 |         ax.set_title(field)
 75 | 
 76 | 
 77 | def plot_precision_recall(files, naming_scheme='iter'):
 78 |     if naming_scheme == 'exp_id':
 79 |         # name becomes exp_id
 80 |         names = [f.parts[-3] for f in files]
 81 |     elif naming_scheme == 'iter':
 82 |         names = [f.stem for f in files]
 83 |     else:
 84 |         raise ValueError(f'not supported {naming_scheme}')
 85 |     fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
 86 |     for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names):
 87 |         data = torch.load(f)
 88 |         # precision is n_iou, n_points, n_cat, n_area, max_det
 89 |         precision = data['precision']
 90 |         recall = data['params'].recThrs
 91 |         scores = data['scores']
 92 |         # take precision for all classes, all areas and 100 detections
 93 |         precision = precision[0, :, :, 0, -1].mean(1)
 94 |         scores = scores[0, :, :, 0, -1].mean(1)
 95 |         prec = precision.mean()
 96 |         rec = data['recall'][0, :, 0, -1].mean()
 97 |         print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' +
 98 |               f'score={scores.mean():0.3f}, ' +
 99 |               f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}'
100 |               )
101 |         axs[0].plot(recall, precision, c=color)
102 |         axs[1].plot(recall, scores, c=color)
103 | 
104 |     axs[0].set_title('Precision / Recall')
105 |     axs[0].legend(names)
106 |     axs[1].set_title('Scores / Recall')
107 |     axs[1].legend(names)
108 |     return fig, axs
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/models/matcher.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | Modules to compute the matching cost and solve the corresponding LSAP.
 12 | """
 13 | import torch
 14 | from scipy.optimize import linear_sum_assignment
 15 | from torch import nn
 16 | 
 17 | from util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
 18 | 
 19 | 
 20 | class HungarianMatcher(nn.Module):
 21 |     """This class computes an assignment between the targets and the predictions of the network
 22 | 
 23 |     For efficiency reasons, the targets don't include the no_object. Because of this, in general,
 24 |     there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
 25 |     while the others are un-matched (and thus treated as non-objects).
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1
 30 |     ):
 31 |         """Creates the matcher
 32 | 
 33 |         Params:
 34 |             cost_class: This is the relative weight of the classification error in the matching cost
 35 |             cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
 36 |             cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
 37 |         """
 38 |         super().__init__()
 39 |         self.cost_class = cost_class
 40 |         self.cost_bbox = cost_bbox
 41 |         self.cost_giou = cost_giou
 42 |         assert (
 43 |             cost_class != 0 or cost_bbox != 0 or cost_giou != 0
 44 |         ), "all costs cant be 0"
 45 | 
 46 |     def forward(self, outputs, targets):
 47 |         """ Performs the matching
 48 | 
 49 |         Params:
 50 |             outputs: This is a dict that contains at least these entries:
 51 |                  "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
 52 |                  "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
 53 | 
 54 |             targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
 55 |                  "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
 56 |                            objects in the target) containing the class labels
 57 |                  "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
 58 | 
 59 |         Returns:
 60 |             A list of size batch_size, containing tuples of (index_i, index_j) where:
 61 |                 - index_i is the indices of the selected predictions (in order)
 62 |                 - index_j is the indices of the corresponding selected targets (in order)
 63 |             For each batch element, it holds:
 64 |                 len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
 65 |         """
 66 |         with torch.no_grad():
 67 |             bs, num_queries = outputs["pred_logits"].shape[:2]
 68 | 
 69 |             # We flatten to compute the cost matrices in a batch
 70 |             out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
 71 |             out_bbox = outputs["pred_boxes"].flatten(
 72 |                 0, 1
 73 |             )  # [batch_size * num_queries, 4]
 74 | 
 75 |             # Also concat the target labels and boxes
 76 |             tgt_ids = torch.cat([v["labels"] for v in targets])
 77 |             tgt_bbox = torch.cat([v["boxes"] for v in targets])
 78 | 
 79 |             # Compute the classification cost.
 80 |             alpha = 0.25
 81 |             gamma = 2.0
 82 |             neg_cost_class = (
 83 |                 (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
 84 |             )
 85 |             pos_cost_class = (
 86 |                 alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
 87 |             )
 88 |             cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
 89 | 
 90 |             # Compute the L1 cost between boxes
 91 |             cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
 92 | 
 93 |             # Compute the giou cost betwen boxes
 94 |             cost_giou = -generalized_box_iou(
 95 |                 box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
 96 |             )
 97 | 
 98 |             # Final cost matrix
 99 |             C = (
100 |                 self.cost_bbox * cost_bbox
101 |                 + self.cost_class * cost_class
102 |                 + self.cost_giou * cost_giou
103 |             )
104 |             C = C.view(bs, num_queries, -1).cpu()
105 | 
106 |             sizes = [len(v["boxes"]) for v in targets]
107 |             indices = [
108 |                 linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))
109 |             ]
110 |             return [
111 |                 (
112 |                     torch.as_tensor(i, dtype=torch.int64),
113 |                     torch.as_tensor(j, dtype=torch.int64),
114 |                 )
115 |                 for i, j in indices
116 |             ]
117 | 
118 | 
119 | def build_matcher(args):
120 |     return HungarianMatcher(
121 |         cost_class=args.set_cost_class,
122 |         cost_bbox=args.set_cost_bbox,
123 |         cost_giou=args.set_cost_giou,
124 |     )
125 | 


--------------------------------------------------------------------------------
/datasets/samplers.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from codes in torch.utils.data.distributed
  7 | # ------------------------------------------------------------------------
  8 | 
  9 | import os
 10 | import math
 11 | import torch
 12 | import torch.distributed as dist
 13 | from torch.utils.data.sampler import Sampler
 14 | 
 15 | 
 16 | class DistributedSampler(Sampler):
 17 |     """Sampler that restricts data loading to a subset of the dataset.
 18 |     It is especially useful in conjunction with
 19 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 20 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 21 |     and load a subset of the original dataset that is exclusive to it.
 22 |     .. note::
 23 |         Dataset is assumed to be of constant size.
 24 |     Arguments:
 25 |         dataset: Dataset used for sampling.
 26 |         num_replicas (optional): Number of processes participating in
 27 |             distributed training.
 28 |         rank (optional): Rank of the current process within num_replicas.
 29 |     """
 30 | 
 31 |     def __init__(self, dataset, num_replicas=None, rank=None, local_rank=None, local_size=None, shuffle=True):
 32 |         if num_replicas is None:
 33 |             if not dist.is_available():
 34 |                 raise RuntimeError("Requires distributed package to be available")
 35 |             num_replicas = dist.get_world_size()
 36 |         if rank is None:
 37 |             if not dist.is_available():
 38 |                 raise RuntimeError("Requires distributed package to be available")
 39 |             rank = dist.get_rank()
 40 |         self.dataset = dataset
 41 |         self.num_replicas = num_replicas
 42 |         self.rank = rank
 43 |         self.epoch = 0
 44 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
 45 |         self.total_size = self.num_samples * self.num_replicas
 46 |         self.shuffle = shuffle
 47 | 
 48 |     def __iter__(self):
 49 |         if self.shuffle:
 50 |             # deterministically shuffle based on epoch
 51 |             g = torch.Generator()
 52 |             g.manual_seed(self.epoch)
 53 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
 54 |         else:
 55 |             indices = torch.arange(len(self.dataset)).tolist()
 56 | 
 57 |         # add extra samples to make it evenly divisible
 58 |         indices += indices[: (self.total_size - len(indices))]
 59 |         assert len(indices) == self.total_size
 60 | 
 61 |         # subsample
 62 |         offset = self.num_samples * self.rank
 63 |         indices = indices[offset : offset + self.num_samples]
 64 |         assert len(indices) == self.num_samples
 65 | 
 66 |         return iter(indices)
 67 | 
 68 |     def __len__(self):
 69 |         return self.num_samples
 70 | 
 71 |     def set_epoch(self, epoch):
 72 |         self.epoch = epoch
 73 | 
 74 | 
 75 | class NodeDistributedSampler(Sampler):
 76 |     """Sampler that restricts data loading to a subset of the dataset.
 77 |     It is especially useful in conjunction with
 78 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 79 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 80 |     and load a subset of the original dataset that is exclusive to it.
 81 |     .. note::
 82 |         Dataset is assumed to be of constant size.
 83 |     Arguments:
 84 |         dataset: Dataset used for sampling.
 85 |         num_replicas (optional): Number of processes participating in
 86 |             distributed training.
 87 |         rank (optional): Rank of the current process within num_replicas.
 88 |     """
 89 | 
 90 |     def __init__(self, dataset, num_replicas=None, rank=None, local_rank=None, local_size=None, shuffle=True):
 91 |         if num_replicas is None:
 92 |             if not dist.is_available():
 93 |                 raise RuntimeError("Requires distributed package to be available")
 94 |             num_replicas = dist.get_world_size()
 95 |         if rank is None:
 96 |             if not dist.is_available():
 97 |                 raise RuntimeError("Requires distributed package to be available")
 98 |             rank = dist.get_rank()
 99 |         if local_rank is None:
100 |             local_rank = int(os.environ.get('LOCAL_RANK', 0))
101 |         if local_size is None:
102 |             local_size = int(os.environ.get('LOCAL_SIZE', 1))
103 |         self.dataset = dataset
104 |         self.shuffle = shuffle
105 |         self.num_replicas = num_replicas
106 |         self.num_parts = local_size
107 |         self.rank = rank
108 |         self.local_rank = local_rank
109 |         self.epoch = 0
110 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
111 |         self.total_size = self.num_samples * self.num_replicas
112 | 
113 |         self.total_size_parts = self.num_samples * self.num_replicas // self.num_parts
114 | 
115 |     def __iter__(self):
116 |         if self.shuffle:
117 |             # deterministically shuffle based on epoch
118 |             g = torch.Generator()
119 |             g.manual_seed(self.epoch)
120 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
121 |         else:
122 |             indices = torch.arange(len(self.dataset)).tolist()
123 |         indices = [i for i in indices if i % self.num_parts == self.local_rank]
124 | 
125 |         # add extra samples to make it evenly divisible
126 |         indices += indices[:(self.total_size_parts - len(indices))]
127 |         assert len(indices) == self.total_size_parts
128 | 
129 |         # subsample
130 |         indices = indices[self.rank // self.num_parts:self.total_size_parts:self.num_replicas // self.num_parts]
131 |         assert len(indices) == self.num_samples
132 | 
133 |         return iter(indices)
134 | 
135 |     def __len__(self):
136 |         return self.num_samples
137 | 
138 |     def set_epoch(self, epoch):
139 |         self.epoch = epoch
140 | 


--------------------------------------------------------------------------------
/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # H-DETR
  3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
  4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
  5 | # ------------------------------------------------------------------------
  6 | # Deformable DETR
  7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  9 | # ------------------------------------------------------------------------
 10 | # Modified from DETR (https://github.com/facebookresearch/detr)
 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 12 | # ------------------------------------------------------------------------
 13 | 
 14 | """
 15 | COCO dataset which returns image_id for evaluation.
 16 | 
 17 | Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
 18 | """
 19 | from pathlib import Path
 20 | 
 21 | import torch
 22 | import torch.utils.data
 23 | from pycocotools import mask as coco_mask
 24 | 
 25 | from .torchvision_datasets import CocoDetection as TvCocoDetection
 26 | from util.misc import get_local_rank, get_local_size
 27 | import datasets.transforms as T
 28 | 
 29 | 
 30 | class CocoDetection(TvCocoDetection):
 31 |     def __init__(
 32 |         self,
 33 |         img_folder,
 34 |         ann_file,
 35 |         transforms,
 36 |         return_masks,
 37 |         cache_mode=False,
 38 |         local_rank=0,
 39 |         local_size=1,
 40 |     ):
 41 |         super(CocoDetection, self).__init__(
 42 |             img_folder,
 43 |             ann_file,
 44 |             cache_mode=cache_mode,
 45 |             local_rank=local_rank,
 46 |             local_size=local_size,
 47 |         )
 48 |         self._transforms = transforms
 49 |         self.prepare = ConvertCocoPolysToMask(return_masks)
 50 | 
 51 |     def __getitem__(self, idx):
 52 |         img, target = super(CocoDetection, self).__getitem__(idx)
 53 |         image_id = self.ids[idx]
 54 |         target = {"image_id": image_id, "annotations": target}
 55 |         img, target = self.prepare(img, target)
 56 |         if self._transforms is not None:
 57 |             img, target = self._transforms(img, target)
 58 |         return img, target
 59 | 
 60 | 
 61 | def convert_coco_poly_to_mask(segmentations, height, width):
 62 |     masks = []
 63 |     for polygons in segmentations:
 64 |         rles = coco_mask.frPyObjects(polygons, height, width)
 65 |         mask = coco_mask.decode(rles)
 66 |         if len(mask.shape) < 3:
 67 |             mask = mask[..., None]
 68 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 69 |         mask = mask.any(dim=2)
 70 |         masks.append(mask)
 71 |     if masks:
 72 |         masks = torch.stack(masks, dim=0)
 73 |     else:
 74 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 75 |     return masks
 76 | 
 77 | 
 78 | class ConvertCocoPolysToMask(object):
 79 |     def __init__(self, return_masks=False):
 80 |         self.return_masks = return_masks
 81 | 
 82 |     def __call__(self, image, target):
 83 |         w, h = image.size
 84 | 
 85 |         image_id = target["image_id"]
 86 |         image_id = torch.tensor([image_id])
 87 | 
 88 |         anno = target["annotations"]
 89 | 
 90 |         anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
 91 | 
 92 |         boxes = [obj["bbox"] for obj in anno]
 93 |         # guard against no boxes via resizing
 94 |         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
 95 |         boxes[:, 2:] += boxes[:, :2]
 96 |         boxes[:, 0::2].clamp_(min=0, max=w)
 97 |         boxes[:, 1::2].clamp_(min=0, max=h)
 98 | 
 99 |         classes = [obj["category_id"] for obj in anno]
100 |         classes = torch.tensor(classes, dtype=torch.int64)
101 | 
102 |         if self.return_masks:
103 |             segmentations = [obj["segmentation"] for obj in anno]
104 |             masks = convert_coco_poly_to_mask(segmentations, h, w)
105 | 
106 |         keypoints = None
107 |         if anno and "keypoints" in anno[0]:
108 |             keypoints = [obj["keypoints"] for obj in anno]
109 |             keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
110 |             num_keypoints = keypoints.shape[0]
111 |             if num_keypoints:
112 |                 keypoints = keypoints.view(num_keypoints, -1, 3)
113 | 
114 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
115 |         boxes = boxes[keep]
116 |         classes = classes[keep]
117 |         if self.return_masks:
118 |             masks = masks[keep]
119 |         if keypoints is not None:
120 |             keypoints = keypoints[keep]
121 | 
122 |         target = {}
123 |         target["boxes"] = boxes
124 |         target["labels"] = classes
125 |         if self.return_masks:
126 |             target["masks"] = masks
127 |         target["image_id"] = image_id
128 |         if keypoints is not None:
129 |             target["keypoints"] = keypoints
130 | 
131 |         # for conversion to coco api
132 |         area = torch.tensor([obj["area"] for obj in anno])
133 |         iscrowd = torch.tensor(
134 |             [obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]
135 |         )
136 |         target["area"] = area[keep]
137 |         target["iscrowd"] = iscrowd[keep]
138 | 
139 |         target["orig_size"] = torch.as_tensor([int(h), int(w)])
140 |         target["size"] = torch.as_tensor([int(h), int(w)])
141 | 
142 |         return image, target
143 | 
144 | 
145 | def make_coco_transforms(image_set):
146 | 
147 |     normalize = T.Compose(
148 |         [T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
149 |     )
150 | 
151 |     scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
152 | 
153 |     if image_set == "train":
154 |         return T.Compose(
155 |             [
156 |                 T.RandomHorizontalFlip(),
157 |                 T.RandomSelect(
158 |                     T.RandomResize(scales, max_size=1333),
159 |                     T.Compose(
160 |                         [
161 |                             T.RandomResize([400, 500, 600]),
162 |                             T.RandomSizeCrop(384, 600),
163 |                             T.RandomResize(scales, max_size=1333),
164 |                         ]
165 |                     ),
166 |                 ),
167 |                 normalize,
168 |             ]
169 |         )
170 | 
171 |     if image_set == "val":
172 |         return T.Compose([T.RandomResize([800], max_size=1333), normalize,])
173 | 
174 |     raise ValueError(f"unknown {image_set}")
175 | 
176 | 
177 | def build(image_set, args, eval_in_training_set):
178 |     root = Path(args.coco_path)
179 |     assert root.exists(), f"provided COCO path {root} does not exist"
180 |     mode = "instances"
181 |     PATHS = {
182 |         "train": (root / "train2017", root / "annotations" / f"{mode}_train2017.json"),
183 |         "val": (root / "val2017", root / "annotations" / f"{mode}_val2017.json"),
184 |     }
185 | 
186 |     img_folder, ann_file = PATHS[image_set]
187 |     if eval_in_training_set:
188 |         image_set = "val"
189 |         print("use validation dataset transforms")
190 |     dataset = CocoDetection(
191 |         img_folder,
192 |         ann_file,
193 |         transforms=make_coco_transforms(image_set),
194 |         return_masks=args.masks,
195 |         cache_mode=args.cache_mode,
196 |         local_rank=get_local_rank(),
197 |         local_size=get_local_size(),
198 |     )
199 |     return dataset
200 | 


--------------------------------------------------------------------------------
/models/ops/modules/ms_deform_attn.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # H-DETR
  3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
  4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
  5 | # ------------------------------------------------------------------------------------------------
  6 | # Deformable DETR
  7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  9 | # ------------------------------------------------------------------------------------------------
 10 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 11 | # ------------------------------------------------------------------------------------------------
 12 | 
 13 | from __future__ import absolute_import
 14 | from __future__ import print_function
 15 | from __future__ import division
 16 | 
 17 | import warnings
 18 | import math
 19 | 
 20 | import torch
 21 | from torch import nn
 22 | import torch.nn.functional as F
 23 | from torch.nn.init import xavier_uniform_, constant_
 24 | 
 25 | from ..functions import MSDeformAttnFunction
 26 | 
 27 | 
 28 | def _is_power_of_2(n):
 29 |     if (not isinstance(n, int)) or (n < 0):
 30 |         raise ValueError(
 31 |             "invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))
 32 |         )
 33 |     return (n & (n - 1) == 0) and n != 0
 34 | 
 35 | 
 36 | class MSDeformAttn(nn.Module):
 37 |     def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
 38 |         """
 39 |         Multi-Scale Deformable Attention Module
 40 |         :param d_model      hidden dimension
 41 |         :param n_levels     number of feature levels
 42 |         :param n_heads      number of attention heads
 43 |         :param n_points     number of sampling points per attention head per feature level
 44 |         """
 45 |         super().__init__()
 46 |         if d_model % n_heads != 0:
 47 |             raise ValueError(
 48 |                 "d_model must be divisible by n_heads, but got {} and {}".format(
 49 |                     d_model, n_heads
 50 |                 )
 51 |             )
 52 |         _d_per_head = d_model // n_heads
 53 |         # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
 54 |         if not _is_power_of_2(_d_per_head):
 55 |             warnings.warn(
 56 |                 "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
 57 |                 "which is more efficient in our CUDA implementation."
 58 |             )
 59 | 
 60 |         self.im2col_step = 64
 61 | 
 62 |         self.d_model = d_model
 63 |         self.n_levels = n_levels
 64 |         self.n_heads = n_heads
 65 |         self.n_points = n_points
 66 | 
 67 |         self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
 68 |         self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
 69 |         self.value_proj = nn.Linear(d_model, d_model)
 70 |         self.output_proj = nn.Linear(d_model, d_model)
 71 | 
 72 |         self._reset_parameters()
 73 | 
 74 |     def _reset_parameters(self):
 75 |         constant_(self.sampling_offsets.weight.data, 0.0)
 76 |         thetas = torch.arange(self.n_heads, dtype=torch.float32) * (
 77 |             2.0 * math.pi / self.n_heads
 78 |         )
 79 |         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
 80 |         grid_init = (
 81 |             (grid_init / grid_init.abs().max(-1, keepdim=True)[0])
 82 |             .view(self.n_heads, 1, 1, 2)
 83 |             .repeat(1, self.n_levels, self.n_points, 1)
 84 |         )
 85 |         for i in range(self.n_points):
 86 |             grid_init[:, :, i, :] *= i + 1
 87 |         with torch.no_grad():
 88 |             self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
 89 |         constant_(self.attention_weights.weight.data, 0.0)
 90 |         constant_(self.attention_weights.bias.data, 0.0)
 91 |         xavier_uniform_(self.value_proj.weight.data)
 92 |         constant_(self.value_proj.bias.data, 0.0)
 93 |         xavier_uniform_(self.output_proj.weight.data)
 94 |         constant_(self.output_proj.bias.data, 0.0)
 95 | 
 96 |     @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
 97 |     def forward(
 98 |         self,
 99 |         query,
100 |         reference_points,
101 |         input_flatten,
102 |         input_spatial_shapes,
103 |         input_level_start_index,
104 |         input_padding_mask=None,
105 |     ):
106 |         """
107 |         :param query                       (N, Length_{query}, C)
108 |         :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
109 |                                         or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
110 |         :param input_flatten               (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
111 |         :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
112 |         :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
113 |         :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
114 | 
115 |         :return output                     (N, Length_{query}, C)
116 |         """
117 |         N, Len_q, _ = query.shape
118 |         N, Len_in, _ = input_flatten.shape
119 |         assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
120 | 
121 |         value = self.value_proj(input_flatten)
122 |         if input_padding_mask is not None:
123 |             value = value.masked_fill(input_padding_mask[..., None], float(0))
124 |         value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
125 |         sampling_offsets = self.sampling_offsets(query).view(
126 |             N, Len_q, self.n_heads, self.n_levels, self.n_points, 2
127 |         )
128 |         attention_weights = self.attention_weights(query).view(
129 |             N, Len_q, self.n_heads, self.n_levels * self.n_points
130 |         )
131 |         attention_weights = F.softmax(attention_weights, -1).view(
132 |             N, Len_q, self.n_heads, self.n_levels, self.n_points
133 |         )
134 |         # N, Len_q, n_heads, n_levels, n_points, 2
135 |         if reference_points.shape[-1] == 2:
136 |             offset_normalizer = torch.stack(
137 |                 [input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1
138 |             )
139 |             sampling_locations = (
140 |                 reference_points[:, :, None, :, None, :]
141 |                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
142 |             )
143 |         elif reference_points.shape[-1] == 4:
144 |             sampling_locations = (
145 |                 reference_points[:, :, None, :, None, :2]
146 |                 + sampling_offsets
147 |                 / self.n_points
148 |                 * reference_points[:, :, None, :, None, 2:]
149 |                 * 0.5
150 |             )
151 |         else:
152 |             raise ValueError(
153 |                 "Last dim of reference_points must be 2 or 4, but get {} instead.".format(
154 |                     reference_points.shape[-1]
155 |                 )
156 |             )
157 |         output = MSDeformAttnFunction.apply(
158 |             value,
159 |             input_spatial_shapes,
160 |             input_level_start_index,
161 |             sampling_locations,
162 |             attention_weights,
163 |             self.im2col_step,
164 |         )
165 |         output = self.output_proj(output)
166 |         return output
167 | 


--------------------------------------------------------------------------------
/models/ops/src/cuda/ms_deform_attn_cuda.cu:
--------------------------------------------------------------------------------
  1 | /*!
  2 | **************************************************************************************************
  3 | * Deformable DETR
  4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
  5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  6 | **************************************************************************************************
  7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
  8 | **************************************************************************************************
  9 | */
 10 | 
 11 | #include <vector>
 12 | #include "cuda/ms_deform_im2col_cuda.cuh"
 13 | 
 14 | #include <ATen/ATen.h>
 15 | #include <ATen/cuda/CUDAContext.h>
 16 | #include <cuda.h>
 17 | #include <cuda_runtime.h>
 18 | 
 19 | 
 20 | at::Tensor ms_deform_attn_cuda_forward(
 21 |     const at::Tensor &value, 
 22 |     const at::Tensor &spatial_shapes,
 23 |     const at::Tensor &level_start_index,
 24 |     const at::Tensor &sampling_loc,
 25 |     const at::Tensor &attn_weight,
 26 |     const int im2col_step)
 27 | {
 28 |     AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
 29 |     AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
 30 |     AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
 31 |     AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
 32 |     AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
 33 | 
 34 |     AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
 35 |     AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
 36 |     AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
 37 |     AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
 38 |     AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
 39 | 
 40 |     const int batch = value.size(0);
 41 |     const int spatial_size = value.size(1);
 42 |     const int num_heads = value.size(2);
 43 |     const int channels = value.size(3);
 44 | 
 45 |     const int num_levels = spatial_shapes.size(0);
 46 | 
 47 |     const int num_query = sampling_loc.size(1);
 48 |     const int num_point = sampling_loc.size(4);
 49 | 
 50 |     const int im2col_step_ = std::min(batch, im2col_step);
 51 | 
 52 |     AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
 53 |     
 54 |     auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
 55 | 
 56 |     const int batch_n = im2col_step_;
 57 |     auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
 58 |     auto per_value_size = spatial_size * num_heads * channels;
 59 |     auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
 60 |     auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
 61 |     for (int n = 0; n < batch/im2col_step_; ++n)
 62 |     {
 63 |         auto columns = output_n.select(0, n);
 64 |         AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
 65 |             ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
 66 |                 value.data<scalar_t>() + n * im2col_step_ * per_value_size,
 67 |                 spatial_shapes.data<int64_t>(),
 68 |                 level_start_index.data<int64_t>(),
 69 |                 sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
 70 |                 attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
 71 |                 batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
 72 |                 columns.data<scalar_t>());
 73 | 
 74 |         }));
 75 |     }
 76 | 
 77 |     output = output.view({batch, num_query, num_heads*channels});
 78 | 
 79 |     return output;
 80 | }
 81 | 
 82 | 
 83 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
 84 |     const at::Tensor &value, 
 85 |     const at::Tensor &spatial_shapes,
 86 |     const at::Tensor &level_start_index,
 87 |     const at::Tensor &sampling_loc,
 88 |     const at::Tensor &attn_weight,
 89 |     const at::Tensor &grad_output,
 90 |     const int im2col_step)
 91 | {
 92 | 
 93 |     AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
 94 |     AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
 95 |     AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
 96 |     AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
 97 |     AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
 98 |     AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
 99 | 
100 |     AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
101 |     AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
102 |     AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
103 |     AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
104 |     AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
105 |     AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
106 | 
107 |     const int batch = value.size(0);
108 |     const int spatial_size = value.size(1);
109 |     const int num_heads = value.size(2);
110 |     const int channels = value.size(3);
111 | 
112 |     const int num_levels = spatial_shapes.size(0);
113 | 
114 |     const int num_query = sampling_loc.size(1);
115 |     const int num_point = sampling_loc.size(4);
116 | 
117 |     const int im2col_step_ = std::min(batch, im2col_step);
118 | 
119 |     AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
120 | 
121 |     auto grad_value = at::zeros_like(value);
122 |     auto grad_sampling_loc = at::zeros_like(sampling_loc);
123 |     auto grad_attn_weight = at::zeros_like(attn_weight);
124 | 
125 |     const int batch_n = im2col_step_;
126 |     auto per_value_size = spatial_size * num_heads * channels;
127 |     auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
128 |     auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
129 |     auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
130 |     
131 |     for (int n = 0; n < batch/im2col_step_; ++n)
132 |     {
133 |         auto grad_output_g = grad_output_n.select(0, n);
134 |         AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
135 |             ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
136 |                                     grad_output_g.data<scalar_t>(),
137 |                                     value.data<scalar_t>() + n * im2col_step_ * per_value_size,
138 |                                     spatial_shapes.data<int64_t>(),
139 |                                     level_start_index.data<int64_t>(),
140 |                                     sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
141 |                                     attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
142 |                                     batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
143 |                                     grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
144 |                                     grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
145 |                                     grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
146 | 
147 |         }));
148 |     }
149 | 
150 |     return {
151 |         grad_value, grad_sampling_loc, grad_attn_weight
152 |     };
153 | }


--------------------------------------------------------------------------------
/tools/launch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # --------------------------------------------------------------------------------------------------------------------------
  6 | # Modified from https://github.com/pytorch/pytorch/blob/173f224570017b4b1a3a1a13d0bff280a54d9cd9/torch/distributed/launch.py
  7 | # --------------------------------------------------------------------------------------------------------------------------
  8 | 
  9 | r"""
 10 | `torch.distributed.launch` is a module that spawns up multiple distributed
 11 | training processes on each of the training nodes.
 12 | The utility can be used for single-node distributed training, in which one or
 13 | more processes per node will be spawned. The utility can be used for either
 14 | CPU training or GPU training. If the utility is used for GPU training,
 15 | each distributed process will be operating on a single GPU. This can achieve
 16 | well-improved single-node training performance. It can also be used in
 17 | multi-node distributed training, by spawning up multiple processes on each node
 18 | for well-improved multi-node distributed training performance as well.
 19 | This will especially be benefitial for systems with multiple Infiniband
 20 | interfaces that have direct-GPU support, since all of them can be utilized for
 21 | aggregated communication bandwidth.
 22 | In both cases of single-node distributed training or multi-node distributed
 23 | training, this utility will launch the given number of processes per node
 24 | (``--nproc_per_node``). If used for GPU training, this number needs to be less
 25 | or euqal to the number of GPUs on the current system (``nproc_per_node``),
 26 | and each process will be operating on a single GPU from *GPU 0 to
 27 | GPU (nproc_per_node - 1)*.
 28 | **How to use this module:**
 29 | 1. Single-Node multi-process distributed training
 30 | ::
 31 |     >>> python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
 32 |                YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
 33 |                arguments of your training script)
 34 | 2. Multi-Node multi-process distributed training: (e.g. two nodes)
 35 | Node 1: *(IP: 192.168.1.1, and has a free port: 1234)*
 36 | ::
 37 |     >>> python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
 38 |                --nnodes=2 --node_rank=0 --master_addr="192.168.1.1"
 39 |                --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
 40 |                and all other arguments of your training script)
 41 | Node 2:
 42 | ::
 43 |     >>> python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
 44 |                --nnodes=2 --node_rank=1 --master_addr="192.168.1.1"
 45 |                --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
 46 |                and all other arguments of your training script)
 47 | 3. To look up what optional arguments this module offers:
 48 | ::
 49 |     >>> python -m torch.distributed.launch --help
 50 | **Important Notices:**
 51 | 1. This utilty and multi-process distributed (single-node or
 52 | multi-node) GPU training currently only achieves the best performance using
 53 | the NCCL distributed backend. Thus NCCL backend is the recommended backend to
 54 | use for GPU training.
 55 | 2. In your training program, you must parse the command-line argument:
 56 | ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by this module.
 57 | If your training program uses GPUs, you should ensure that your code only
 58 | runs on the GPU device of LOCAL_PROCESS_RANK. This can be done by:
 59 | Parsing the local_rank argument
 60 | ::
 61 |     >>> import argparse
 62 |     >>> parser = argparse.ArgumentParser()
 63 |     >>> parser.add_argument("--local_rank", type=int)
 64 |     >>> args = parser.parse_args()
 65 | Set your device to local rank using either
 66 | ::
 67 |     >>> torch.cuda.set_device(arg.local_rank)  # before your code runs
 68 | or
 69 | ::
 70 |     >>> with torch.cuda.device(arg.local_rank):
 71 |     >>>    # your code to run
 72 | 3. In your training program, you are supposed to call the following function
 73 | at the beginning to start the distributed backend. You need to make sure that
 74 | the init_method uses ``env://``, which is the only supported ``init_method``
 75 | by this module.
 76 | ::
 77 |     torch.distributed.init_process_group(backend='YOUR BACKEND',
 78 |                                          init_method='env://')
 79 | 4. In your training program, you can either use regular distributed functions
 80 | or use :func:`torch.nn.parallel.DistributedDataParallel` module. If your
 81 | training program uses GPUs for training and you would like to use
 82 | :func:`torch.nn.parallel.DistributedDataParallel` module,
 83 | here is how to configure it.
 84 | ::
 85 |     model = torch.nn.parallel.DistributedDataParallel(model,
 86 |                                                       device_ids=[arg.local_rank],
 87 |                                                       output_device=arg.local_rank)
 88 | Please ensure that ``device_ids`` argument is set to be the only GPU device id
 89 | that your code will be operating on. This is generally the local rank of the
 90 | process. In other words, the ``device_ids`` needs to be ``[args.local_rank]``,
 91 | and ``output_device`` needs to be ``args.local_rank`` in order to use this
 92 | utility
 93 | 5. Another way to pass ``local_rank`` to the subprocesses via environment variable
 94 | ``LOCAL_RANK``. This behavior is enabled when you launch the script with
 95 | ``--use_env=True``. You must adjust the subprocess example above to replace
 96 | ``args.local_rank`` with ``os.environ['LOCAL_RANK']``; the launcher
 97 | will not pass ``--local_rank`` when you specify this flag.
 98 | .. warning::
 99 |     ``local_rank`` is NOT globally unique: it is only unique per process
100 |     on a machine.  Thus, don't use it to decide if you should, e.g.,
101 |     write to a networked filesystem.  See
102 |     https://github.com/pytorch/pytorch/issues/12042 for an example of
103 |     how things can go wrong if you don't do this correctly.
104 | """
105 | 
106 | 
107 | import sys
108 | import subprocess
109 | import os
110 | import socket
111 | from argparse import ArgumentParser, REMAINDER
112 | 
113 | import torch
114 | 
115 | 
116 | def parse_args():
117 |     """
118 |     Helper function parsing the command line options
119 |     @retval ArgumentParser
120 |     """
121 |     parser = ArgumentParser(
122 |         description="PyTorch distributed training launch "
123 |         "helper utilty that will spawn up "
124 |         "multiple distributed processes"
125 |     )
126 | 
127 |     # Optional arguments for the launch helper
128 |     parser.add_argument(
129 |         "--nnodes",
130 |         type=int,
131 |         default=1,
132 |         help="The number of nodes to use for distributed " "training",
133 |     )
134 |     parser.add_argument(
135 |         "--node_rank",
136 |         type=int,
137 |         default=0,
138 |         help="The rank of the node for multi-node distributed " "training",
139 |     )
140 |     parser.add_argument(
141 |         "--nproc_per_node",
142 |         type=int,
143 |         default=1,
144 |         help="The number of processes to launch on each node, "
145 |         "for GPU training, this is recommended to be set "
146 |         "to the number of GPUs in your system so that "
147 |         "each process can be bound to a single GPU.",
148 |     )
149 |     parser.add_argument(
150 |         "--master_addr",
151 |         default="127.0.0.1",
152 |         type=str,
153 |         help="Master node (rank 0)'s address, should be either "
154 |         "the IP address or the hostname of node 0, for "
155 |         "single node multi-proc training, the "
156 |         "--master_addr can simply be 127.0.0.1",
157 |     )
158 |     parser.add_argument(
159 |         "--master_port",
160 |         default=29500,
161 |         type=int,
162 |         help="Master node (rank 0)'s free port that needs to "
163 |         "be used for communciation during distributed "
164 |         "training",
165 |     )
166 | 
167 |     # positional
168 |     parser.add_argument(
169 |         "training_script",
170 |         type=str,
171 |         help="The full path to the single GPU training "
172 |         "program/script to be launched in parallel, "
173 |         "followed by all the arguments for the "
174 |         "training script",
175 |     )
176 | 
177 |     # rest from the training program
178 |     parser.add_argument("training_script_args", nargs=REMAINDER)
179 |     return parser.parse_args()
180 | 
181 | 
182 | def main():
183 |     args = parse_args()
184 | 
185 |     # world size in terms of number of processes
186 |     dist_world_size = args.nproc_per_node * args.nnodes
187 | 
188 |     # set PyTorch distributed related environmental variables
189 |     current_env = os.environ.copy()
190 |     current_env["MASTER_ADDR"] = args.master_addr
191 |     current_env["MASTER_PORT"] = str(args.master_port)
192 |     current_env["WORLD_SIZE"] = str(dist_world_size)
193 | 
194 |     processes = []
195 | 
196 |     for local_rank in range(0, args.nproc_per_node):
197 |         # each process's rank
198 |         dist_rank = args.nproc_per_node * args.node_rank + local_rank
199 |         current_env["RANK"] = str(dist_rank)
200 |         current_env["LOCAL_RANK"] = str(local_rank)
201 | 
202 |         cmd = [args.training_script] + args.training_script_args
203 | 
204 |         process = subprocess.Popen(cmd, env=current_env)
205 |         processes.append(process)
206 | 
207 |     for process in processes:
208 |         process.wait()
209 |         if process.returncode != 0:
210 |             raise subprocess.CalledProcessError(
211 |                 returncode=process.returncode, cmd=process.args
212 |             )
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     main()
217 | 


--------------------------------------------------------------------------------
/datasets/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | Transforms and data augmentation for both image + bbox.
 12 | """
 13 | import random
 14 | 
 15 | import PIL
 16 | import torch
 17 | import torchvision.transforms as T
 18 | import torchvision.transforms.functional as F
 19 | 
 20 | from util.box_ops import box_xyxy_to_cxcywh
 21 | from util.misc import interpolate
 22 | 
 23 | 
 24 | def crop(image, target, region):
 25 |     cropped_image = F.crop(image, *region)
 26 | 
 27 |     target = target.copy()
 28 |     i, j, h, w = region
 29 | 
 30 |     # should we do something wrt the original size?
 31 |     target["size"] = torch.tensor([h, w])
 32 | 
 33 |     fields = ["labels", "area", "iscrowd"]
 34 | 
 35 |     if "boxes" in target:
 36 |         boxes = target["boxes"]
 37 |         max_size = torch.as_tensor([w, h], dtype=torch.float32)
 38 |         cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
 39 |         cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
 40 |         cropped_boxes = cropped_boxes.clamp(min=0)
 41 |         area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
 42 |         target["boxes"] = cropped_boxes.reshape(-1, 4)
 43 |         target["area"] = area
 44 |         fields.append("boxes")
 45 | 
 46 |     if "masks" in target:
 47 |         # FIXME should we update the area here if there are no boxes?
 48 |         target["masks"] = target["masks"][:, i : i + h, j : j + w]
 49 |         fields.append("masks")
 50 | 
 51 |     # remove elements for which the boxes or masks that have zero area
 52 |     if "boxes" in target or "masks" in target:
 53 |         # favor boxes selection when defining which elements to keep
 54 |         # this is compatible with previous implementation
 55 |         if "boxes" in target:
 56 |             cropped_boxes = target["boxes"].reshape(-1, 2, 2)
 57 |             keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
 58 |         else:
 59 |             keep = target["masks"].flatten(1).any(1)
 60 | 
 61 |         for field in fields:
 62 |             target[field] = target[field][keep]
 63 | 
 64 |     return cropped_image, target
 65 | 
 66 | 
 67 | def hflip(image, target):
 68 |     flipped_image = F.hflip(image)
 69 | 
 70 |     w, h = image.size
 71 | 
 72 |     target = target.copy()
 73 |     if "boxes" in target:
 74 |         boxes = target["boxes"]
 75 |         boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
 76 |             [-1, 1, -1, 1]
 77 |         ) + torch.as_tensor([w, 0, w, 0])
 78 |         target["boxes"] = boxes
 79 | 
 80 |     if "masks" in target:
 81 |         target["masks"] = target["masks"].flip(-1)
 82 | 
 83 |     return flipped_image, target
 84 | 
 85 | 
 86 | def resize(image, target, size, max_size=None):
 87 |     # size can be min_size (scalar) or (w, h) tuple
 88 | 
 89 |     def get_size_with_aspect_ratio(image_size, size, max_size=None):
 90 |         w, h = image_size
 91 |         if max_size is not None:
 92 |             min_original_size = float(min((w, h)))
 93 |             max_original_size = float(max((w, h)))
 94 |             if max_original_size / min_original_size * size > max_size:
 95 |                 size = int(round(max_size * min_original_size / max_original_size))
 96 | 
 97 |         if (w <= h and w == size) or (h <= w and h == size):
 98 |             return (h, w)
 99 | 
100 |         if w < h:
101 |             ow = size
102 |             oh = int(size * h / w)
103 |         else:
104 |             oh = size
105 |             ow = int(size * w / h)
106 | 
107 |         return (oh, ow)
108 | 
109 |     def get_size(image_size, size, max_size=None):
110 |         if isinstance(size, (list, tuple)):
111 |             return size[::-1]
112 |         else:
113 |             return get_size_with_aspect_ratio(image_size, size, max_size)
114 | 
115 |     size = get_size(image.size, size, max_size)
116 |     rescaled_image = F.resize(image, size)
117 | 
118 |     if target is None:
119 |         return rescaled_image, None
120 | 
121 |     ratios = tuple(
122 |         float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)
123 |     )
124 |     ratio_width, ratio_height = ratios
125 | 
126 |     target = target.copy()
127 |     if "boxes" in target:
128 |         boxes = target["boxes"]
129 |         scaled_boxes = boxes * torch.as_tensor(
130 |             [ratio_width, ratio_height, ratio_width, ratio_height]
131 |         )
132 |         target["boxes"] = scaled_boxes
133 | 
134 |     if "area" in target:
135 |         area = target["area"]
136 |         scaled_area = area * (ratio_width * ratio_height)
137 |         target["area"] = scaled_area
138 | 
139 |     h, w = size
140 |     target["size"] = torch.tensor([h, w])
141 | 
142 |     if "masks" in target:
143 |         target["masks"] = (
144 |             interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0]
145 |             > 0.5
146 |         )
147 | 
148 |     return rescaled_image, target
149 | 
150 | 
151 | def pad(image, target, padding):
152 |     # assumes that we only pad on the bottom right corners
153 |     padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
154 |     if target is None:
155 |         return padded_image, None
156 |     target = target.copy()
157 |     # should we do something wrt the original size?
158 |     target["size"] = torch.tensor(padded_image[::-1])
159 |     if "masks" in target:
160 |         target["masks"] = torch.nn.functional.pad(
161 |             target["masks"], (0, padding[0], 0, padding[1])
162 |         )
163 |     return padded_image, target
164 | 
165 | 
166 | class RandomCrop(object):
167 |     def __init__(self, size):
168 |         self.size = size
169 | 
170 |     def __call__(self, img, target):
171 |         region = T.RandomCrop.get_params(img, self.size)
172 |         return crop(img, target, region)
173 | 
174 | 
175 | class RandomSizeCrop(object):
176 |     def __init__(self, min_size: int, max_size: int):
177 |         self.min_size = min_size
178 |         self.max_size = max_size
179 | 
180 |     def __call__(self, img: PIL.Image.Image, target: dict):
181 |         w = random.randint(self.min_size, min(img.width, self.max_size))
182 |         h = random.randint(self.min_size, min(img.height, self.max_size))
183 |         region = T.RandomCrop.get_params(img, [h, w])
184 |         return crop(img, target, region)
185 | 
186 | 
187 | class CenterCrop(object):
188 |     def __init__(self, size):
189 |         self.size = size
190 | 
191 |     def __call__(self, img, target):
192 |         image_width, image_height = img.size
193 |         crop_height, crop_width = self.size
194 |         crop_top = int(round((image_height - crop_height) / 2.0))
195 |         crop_left = int(round((image_width - crop_width) / 2.0))
196 |         return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
197 | 
198 | 
199 | class RandomHorizontalFlip(object):
200 |     def __init__(self, p=0.5):
201 |         self.p = p
202 | 
203 |     def __call__(self, img, target):
204 |         if random.random() < self.p:
205 |             return hflip(img, target)
206 |         return img, target
207 | 
208 | 
209 | class RandomResize(object):
210 |     def __init__(self, sizes, max_size=None):
211 |         assert isinstance(sizes, (list, tuple))
212 |         self.sizes = sizes
213 |         self.max_size = max_size
214 | 
215 |     def __call__(self, img, target=None):
216 |         size = random.choice(self.sizes)
217 |         return resize(img, target, size, self.max_size)
218 | 
219 | 
220 | class RandomPad(object):
221 |     def __init__(self, max_pad):
222 |         self.max_pad = max_pad
223 | 
224 |     def __call__(self, img, target):
225 |         pad_x = random.randint(0, self.max_pad)
226 |         pad_y = random.randint(0, self.max_pad)
227 |         return pad(img, target, (pad_x, pad_y))
228 | 
229 | 
230 | class RandomSelect(object):
231 |     """
232 |     Randomly selects between transforms1 and transforms2,
233 |     with probability p for transforms1 and (1 - p) for transforms2
234 |     """
235 | 
236 |     def __init__(self, transforms1, transforms2, p=0.5):
237 |         self.transforms1 = transforms1
238 |         self.transforms2 = transforms2
239 |         self.p = p
240 | 
241 |     def __call__(self, img, target):
242 |         if random.random() < self.p:
243 |             return self.transforms1(img, target)
244 |         return self.transforms2(img, target)
245 | 
246 | 
247 | class ToTensor(object):
248 |     def __call__(self, img, target):
249 |         return F.to_tensor(img), target
250 | 
251 | 
252 | class RandomErasing(object):
253 |     def __init__(self, *args, **kwargs):
254 |         self.eraser = T.RandomErasing(*args, **kwargs)
255 | 
256 |     def __call__(self, img, target):
257 |         return self.eraser(img), target
258 | 
259 | 
260 | class Normalize(object):
261 |     def __init__(self, mean, std):
262 |         self.mean = mean
263 |         self.std = std
264 | 
265 |     def __call__(self, image, target=None):
266 |         image = F.normalize(image, mean=self.mean, std=self.std)
267 |         if target is None:
268 |             return image, None
269 |         target = target.copy()
270 |         h, w = image.shape[-2:]
271 |         if "boxes" in target:
272 |             boxes = target["boxes"]
273 |             boxes = box_xyxy_to_cxcywh(boxes)
274 |             boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
275 |             target["boxes"] = boxes
276 |         return image, target
277 | 
278 | 
279 | class Compose(object):
280 |     def __init__(self, transforms):
281 |         self.transforms = transforms
282 | 
283 |     def __call__(self, image, target):
284 |         for t in self.transforms:
285 |             image, target = t(image, target)
286 |         return image, target
287 | 
288 |     def __repr__(self):
289 |         format_string = self.__class__.__name__ + "("
290 |         for t in self.transforms:
291 |             format_string += "\n"
292 |             format_string += "    {0}".format(t)
293 |         format_string += "\n)"
294 |         return format_string
295 | 


--------------------------------------------------------------------------------
/datasets/coco_eval.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | COCO evaluator that works in distributed mode.
 12 | 
 13 | Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
 14 | The difference is that there is less copy-pasting from pycocotools
 15 | in the end of the file, as python3 can suppress prints with contextlib
 16 | """
 17 | import os
 18 | import contextlib
 19 | import copy
 20 | import numpy as np
 21 | import torch
 22 | 
 23 | from pycocotools.cocoeval import COCOeval
 24 | from pycocotools.coco import COCO
 25 | import pycocotools.mask as mask_util
 26 | 
 27 | from util.misc import all_gather
 28 | 
 29 | 
 30 | class CocoEvaluator(object):
 31 |     def __init__(self, coco_gt, iou_types):
 32 |         assert isinstance(iou_types, (list, tuple))
 33 |         coco_gt = copy.deepcopy(coco_gt)
 34 |         self.coco_gt = coco_gt
 35 | 
 36 |         self.iou_types = iou_types
 37 |         self.coco_eval = {}
 38 |         for iou_type in iou_types:
 39 |             self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
 40 | 
 41 |         self.img_ids = []
 42 |         self.eval_imgs = {k: [] for k in iou_types}
 43 | 
 44 |     def update(self, predictions):
 45 |         img_ids = list(np.unique(list(predictions.keys())))
 46 |         self.img_ids.extend(img_ids)
 47 | 
 48 |         for iou_type in self.iou_types:
 49 |             results = self.prepare(predictions, iou_type)
 50 | 
 51 |             # suppress pycocotools prints
 52 |             with open(os.devnull, "w") as devnull:
 53 |                 with contextlib.redirect_stdout(devnull):
 54 |                     coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
 55 |             coco_eval = self.coco_eval[iou_type]
 56 | 
 57 |             coco_eval.cocoDt = coco_dt
 58 |             coco_eval.params.imgIds = list(img_ids)
 59 |             img_ids, eval_imgs = evaluate(coco_eval)
 60 | 
 61 |             self.eval_imgs[iou_type].append(eval_imgs)
 62 | 
 63 |     def synchronize_between_processes(self):
 64 |         for iou_type in self.iou_types:
 65 |             self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 66 |             create_common_coco_eval(
 67 |                 self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]
 68 |             )
 69 | 
 70 |     def accumulate(self):
 71 |         for coco_eval in self.coco_eval.values():
 72 |             coco_eval.accumulate()
 73 | 
 74 |     def summarize(self):
 75 |         for iou_type, coco_eval in self.coco_eval.items():
 76 |             print("IoU metric: {}".format(iou_type))
 77 |             coco_eval.summarize()
 78 | 
 79 |     def prepare(self, predictions, iou_type):
 80 |         if iou_type == "bbox":
 81 |             return self.prepare_for_coco_detection(predictions)
 82 |         elif iou_type == "segm":
 83 |             return self.prepare_for_coco_segmentation(predictions)
 84 |         elif iou_type == "keypoints":
 85 |             return self.prepare_for_coco_keypoint(predictions)
 86 |         else:
 87 |             raise ValueError("Unknown iou type {}".format(iou_type))
 88 | 
 89 |     def prepare_for_coco_detection(self, predictions):
 90 |         coco_results = []
 91 |         for original_id, prediction in predictions.items():
 92 |             if len(prediction) == 0:
 93 |                 continue
 94 | 
 95 |             boxes = prediction["boxes"]
 96 |             boxes = convert_to_xywh(boxes).tolist()
 97 |             scores = prediction["scores"].tolist()
 98 |             labels = prediction["labels"].tolist()
 99 | 
100 |             coco_results.extend(
101 |                 [
102 |                     {
103 |                         "image_id": original_id,
104 |                         "category_id": labels[k],
105 |                         "bbox": box,
106 |                         "score": scores[k],
107 |                     }
108 |                     for k, box in enumerate(boxes)
109 |                 ]
110 |             )
111 |         return coco_results
112 | 
113 |     def prepare_for_coco_segmentation(self, predictions):
114 |         coco_results = []
115 |         for original_id, prediction in predictions.items():
116 |             if len(prediction) == 0:
117 |                 continue
118 | 
119 |             scores = prediction["scores"]
120 |             labels = prediction["labels"]
121 |             masks = prediction["masks"]
122 | 
123 |             masks = masks > 0.5
124 | 
125 |             scores = prediction["scores"].tolist()
126 |             labels = prediction["labels"].tolist()
127 | 
128 |             rles = [
129 |                 mask_util.encode(
130 |                     np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
131 |                 )[0]
132 |                 for mask in masks
133 |             ]
134 |             for rle in rles:
135 |                 rle["counts"] = rle["counts"].decode("utf-8")
136 | 
137 |             coco_results.extend(
138 |                 [
139 |                     {
140 |                         "image_id": original_id,
141 |                         "category_id": labels[k],
142 |                         "segmentation": rle,
143 |                         "score": scores[k],
144 |                     }
145 |                     for k, rle in enumerate(rles)
146 |                 ]
147 |             )
148 |         return coco_results
149 | 
150 |     def prepare_for_coco_keypoint(self, predictions):
151 |         coco_results = []
152 |         for original_id, prediction in predictions.items():
153 |             if len(prediction) == 0:
154 |                 continue
155 | 
156 |             boxes = prediction["boxes"]
157 |             boxes = convert_to_xywh(boxes).tolist()
158 |             scores = prediction["scores"].tolist()
159 |             labels = prediction["labels"].tolist()
160 |             keypoints = prediction["keypoints"]
161 |             keypoints = keypoints.flatten(start_dim=1).tolist()
162 | 
163 |             coco_results.extend(
164 |                 [
165 |                     {
166 |                         "image_id": original_id,
167 |                         "category_id": labels[k],
168 |                         "keypoints": keypoint,
169 |                         "score": scores[k],
170 |                     }
171 |                     for k, keypoint in enumerate(keypoints)
172 |                 ]
173 |             )
174 |         return coco_results
175 | 
176 | 
177 | def convert_to_xywh(boxes):
178 |     xmin, ymin, xmax, ymax = boxes.unbind(1)
179 |     return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
180 | 
181 | 
182 | def merge(img_ids, eval_imgs):
183 |     all_img_ids = all_gather(img_ids)
184 |     all_eval_imgs = all_gather(eval_imgs)
185 | 
186 |     merged_img_ids = []
187 |     for p in all_img_ids:
188 |         merged_img_ids.extend(p)
189 | 
190 |     merged_eval_imgs = []
191 |     for p in all_eval_imgs:
192 |         merged_eval_imgs.append(p)
193 | 
194 |     merged_img_ids = np.array(merged_img_ids)
195 |     merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
196 | 
197 |     # keep only unique (and in sorted order) images
198 |     merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
199 |     merged_eval_imgs = merged_eval_imgs[..., idx]
200 | 
201 |     return merged_img_ids, merged_eval_imgs
202 | 
203 | 
204 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
205 |     img_ids, eval_imgs = merge(img_ids, eval_imgs)
206 |     img_ids = list(img_ids)
207 |     eval_imgs = list(eval_imgs.flatten())
208 | 
209 |     coco_eval.evalImgs = eval_imgs
210 |     coco_eval.params.imgIds = img_ids
211 |     coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
212 | 
213 | 
214 | #################################################################
215 | # From pycocotools, just removed the prints and fixed
216 | # a Python3 bug about unicode not defined
217 | #################################################################
218 | 
219 | 
220 | def evaluate(self):
221 |     """
222 |     Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
223 |     :return: None
224 |     """
225 |     # tic = time.time()
226 |     # print('Running per image evaluation...')
227 |     p = self.params
228 |     # add backward compatibility if useSegm is specified in params
229 |     if p.useSegm is not None:
230 |         p.iouType = "segm" if p.useSegm == 1 else "bbox"
231 |         print(
232 |             "useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)
233 |         )
234 |     # print('Evaluate annotation type *{}*'.format(p.iouType))
235 |     p.imgIds = list(np.unique(p.imgIds))
236 |     if p.useCats:
237 |         p.catIds = list(np.unique(p.catIds))
238 |     p.maxDets = sorted(p.maxDets)
239 |     self.params = p
240 | 
241 |     self._prepare()
242 |     # loop through images, area range, max detection number
243 |     catIds = p.catIds if p.useCats else [-1]
244 | 
245 |     if p.iouType == "segm" or p.iouType == "bbox":
246 |         computeIoU = self.computeIoU
247 |     elif p.iouType == "keypoints":
248 |         computeIoU = self.computeOks
249 |     self.ious = {
250 |         (imgId, catId): computeIoU(imgId, catId)
251 |         for imgId in p.imgIds
252 |         for catId in catIds
253 |     }
254 | 
255 |     evaluateImg = self.evaluateImg
256 |     maxDet = p.maxDets[-1]
257 |     evalImgs = [
258 |         evaluateImg(imgId, catId, areaRng, maxDet)
259 |         for catId in catIds
260 |         for areaRng in p.areaRng
261 |         for imgId in p.imgIds
262 |     ]
263 |     # this is NOT in the pycocotools code, but could be done outside
264 |     evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
265 |     self._paramsEval = copy.deepcopy(self.params)
266 |     # toc = time.time()
267 |     # print('DONE (t={:0.2f}s).'.format(toc-tic))
268 |     return p.imgIds, evalImgs
269 | 
270 | 
271 | #################################################################
272 | # end of straight copy from pycocotools, just removing the prints
273 | #################################################################
274 | 


--------------------------------------------------------------------------------
/models/backbone.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # H-DETR
  3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
  4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
  5 | # ------------------------------------------------------------------------
  6 | # Deformable DETR
  7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  9 | # ------------------------------------------------------------------------
 10 | # Modified from DETR (https://github.com/facebookresearch/detr)
 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 12 | # ------------------------------------------------------------------------
 13 | 
 14 | """
 15 | Backbone modules.
 16 | """
 17 | from collections import OrderedDict
 18 | 
 19 | import torch
 20 | import torch.nn.functional as F
 21 | import torchvision
 22 | from torch import nn
 23 | from torchvision.models._utils import IntermediateLayerGetter
 24 | from typing import Dict, List
 25 | 
 26 | from util.misc import NestedTensor, is_main_process
 27 | 
 28 | from .position_encoding import build_position_encoding
 29 | from .swin_transformer import SwinTransformer
 30 | 
 31 | 
 32 | class FrozenBatchNorm2d(torch.nn.Module):
 33 |     """
 34 |     BatchNorm2d where the batch statistics and the affine parameters are fixed.
 35 | 
 36 |     Copy-paste from torchvision.misc.ops with added eps before rqsrt,
 37 |     without which any other models than torchvision.models.resnet[18,34,50,101]
 38 |     produce nans.
 39 |     """
 40 | 
 41 |     def __init__(self, n, eps=1e-5):
 42 |         super(FrozenBatchNorm2d, self).__init__()
 43 |         self.register_buffer("weight", torch.ones(n))
 44 |         self.register_buffer("bias", torch.zeros(n))
 45 |         self.register_buffer("running_mean", torch.zeros(n))
 46 |         self.register_buffer("running_var", torch.ones(n))
 47 |         self.eps = eps
 48 | 
 49 |     def _load_from_state_dict(
 50 |         self,
 51 |         state_dict,
 52 |         prefix,
 53 |         local_metadata,
 54 |         strict,
 55 |         missing_keys,
 56 |         unexpected_keys,
 57 |         error_msgs,
 58 |     ):
 59 |         num_batches_tracked_key = prefix + "num_batches_tracked"
 60 |         if num_batches_tracked_key in state_dict:
 61 |             del state_dict[num_batches_tracked_key]
 62 | 
 63 |         super(FrozenBatchNorm2d, self)._load_from_state_dict(
 64 |             state_dict,
 65 |             prefix,
 66 |             local_metadata,
 67 |             strict,
 68 |             missing_keys,
 69 |             unexpected_keys,
 70 |             error_msgs,
 71 |         )
 72 | 
 73 |     def forward(self, x):
 74 |         # move reshapes to the beginning
 75 |         # to make it fuser-friendly
 76 |         w = self.weight.reshape(1, -1, 1, 1)
 77 |         b = self.bias.reshape(1, -1, 1, 1)
 78 |         rv = self.running_var.reshape(1, -1, 1, 1)
 79 |         rm = self.running_mean.reshape(1, -1, 1, 1)
 80 |         eps = self.eps
 81 |         scale = w * (rv + eps).rsqrt()
 82 |         bias = b - rm * scale
 83 |         return x * scale + bias
 84 | 
 85 | 
 86 | class BackboneBase(nn.Module):
 87 |     def __init__(
 88 |         self, backbone: nn.Module, train_backbone: bool, return_interm_layers: bool
 89 |     ):
 90 |         super().__init__()
 91 |         for name, parameter in backbone.named_parameters():
 92 |             if (
 93 |                 not train_backbone
 94 |                 or "layer2" not in name
 95 |                 and "layer3" not in name
 96 |                 and "layer4" not in name
 97 |             ):
 98 |                 parameter.requires_grad_(False)
 99 |         if return_interm_layers:
100 |             # return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"}
101 |             return_layers = {"layer2": "0", "layer3": "1", "layer4": "2"}
102 |             self.strides = [8, 16, 32]
103 |             self.num_channels = [512, 1024, 2048]
104 |         else:
105 |             return_layers = {"layer4": "0"}
106 |             self.strides = [32]
107 |             self.num_channels = [2048]
108 |         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
109 | 
110 |     def forward(self, tensor_list: NestedTensor):
111 |         xs = self.body(tensor_list.tensors)
112 |         out: Dict[str, NestedTensor] = {}
113 |         for name, x in xs.items():
114 |             m = tensor_list.mask
115 |             assert m is not None
116 |             mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
117 |             out[name] = NestedTensor(x, mask)
118 |         return out
119 | 
120 | 
121 | class Backbone(BackboneBase):
122 |     """ResNet backbone with frozen BatchNorm."""
123 | 
124 |     def __init__(
125 |         self,
126 |         name: str,
127 |         train_backbone: bool,
128 |         return_interm_layers: bool,
129 |         dilation: bool,
130 |     ):
131 |         norm_layer = FrozenBatchNorm2d
132 |         backbone = getattr(torchvision.models, name)(
133 |             replace_stride_with_dilation=[False, False, dilation],
134 |             pretrained=is_main_process(),
135 |             norm_layer=norm_layer,
136 |         )
137 |         assert name not in ("resnet18", "resnet34"), "number of channels are hard coded"
138 |         super().__init__(backbone, train_backbone, return_interm_layers)
139 |         if dilation:
140 |             self.strides[-1] = self.strides[-1] // 2
141 | 
142 | 
143 | class TransformerBackbone(nn.Module):
144 |     def __init__(
145 |         self, backbone: str, train_backbone: bool, return_interm_layers: bool, args
146 |     ):
147 |         super().__init__()
148 |         out_indices = (1, 2, 3)
149 |         if backbone == "swin_tiny":
150 |             backbone = SwinTransformer(
151 |                 embed_dim=96,
152 |                 depths=[2, 2, 6, 2],
153 |                 num_heads=[3, 6, 12, 24],
154 |                 window_size=7,
155 |                 ape=False,
156 |                 drop_path_rate=args.drop_path_rate,
157 |                 patch_norm=True,
158 |                 use_checkpoint=True,
159 |                 out_indices=out_indices,
160 |             )
161 |             embed_dim = 96
162 |             backbone.init_weights(args.pretrained_backbone_path)
163 |         elif backbone == "swin_small":
164 |             backbone = SwinTransformer(
165 |                 embed_dim=96,
166 |                 depths=[2, 2, 18, 2],
167 |                 num_heads=[3, 6, 12, 24],
168 |                 window_size=7,
169 |                 ape=False,
170 |                 drop_path_rate=args.drop_path_rate,
171 |                 patch_norm=True,
172 |                 use_checkpoint=True,
173 |                 out_indices=out_indices,
174 |             )
175 |             embed_dim = 96
176 |             backbone.init_weights(args.pretrained_backbone_path)
177 |         elif backbone == "swin_large":
178 |             backbone = SwinTransformer(
179 |                 embed_dim=192,
180 |                 depths=[2, 2, 18, 2],
181 |                 num_heads=[6, 12, 24, 48],
182 |                 window_size=7,
183 |                 ape=False,
184 |                 drop_path_rate=args.drop_path_rate,
185 |                 patch_norm=True,
186 |                 use_checkpoint=True,
187 |                 out_indices=out_indices,
188 |             )
189 |             embed_dim = 192
190 |             backbone.init_weights(args.pretrained_backbone_path)
191 |         elif backbone == "swin_large_window12":
192 |             backbone = SwinTransformer(
193 |                 pretrain_img_size=384,
194 |                 embed_dim=192,
195 |                 depths=[2, 2, 18, 2],
196 |                 num_heads=[6, 12, 24, 48],
197 |                 window_size=12,
198 |                 ape=False,
199 |                 drop_path_rate=args.drop_path_rate,
200 |                 patch_norm=True,
201 |                 use_checkpoint=True,
202 |                 out_indices=out_indices,
203 |             )
204 |             embed_dim = 192
205 |             backbone.init_weights(args.pretrained_backbone_path)
206 |         else:
207 |             raise NotImplementedError
208 | 
209 |         for name, parameter in backbone.named_parameters():
210 |             # TODO: freeze some layers?
211 |             if not train_backbone:
212 |                 parameter.requires_grad_(False)
213 | 
214 |         if return_interm_layers:
215 | 
216 |             self.strides = [8, 16, 32]
217 |             self.num_channels = [
218 |                 embed_dim * 2,
219 |                 embed_dim * 4,
220 |                 embed_dim * 8,
221 |             ]
222 |         else:
223 |             self.strides = [32]
224 |             self.num_channels = [embed_dim * 8]
225 | 
226 |         self.body = backbone
227 | 
228 |     def forward(self, tensor_list: NestedTensor):
229 |         xs = self.body(tensor_list.tensors)
230 | 
231 |         out: Dict[str, NestedTensor] = {}
232 |         for name, x in xs.items():
233 |             m = tensor_list.mask
234 |             assert m is not None
235 |             mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
236 |             out[name] = NestedTensor(x, mask)
237 |         return out
238 | 
239 | 
240 | class Joiner(nn.Sequential):
241 |     def __init__(self, backbone, position_embedding):
242 |         super().__init__(backbone, position_embedding)
243 |         self.strides = backbone.strides
244 |         self.num_channels = backbone.num_channels
245 | 
246 |     def forward(self, tensor_list: NestedTensor):
247 |         xs = self[0](tensor_list)
248 |         out: List[NestedTensor] = []
249 |         pos = []
250 |         for name, x in sorted(xs.items()):
251 |             out.append(x)
252 | 
253 |         # position encoding
254 |         for x in out:
255 |             pos.append(self[1](x).to(x.tensors.dtype))
256 | 
257 |         return out, pos
258 | 
259 | 
260 | def build_backbone(args):
261 |     position_embedding = build_position_encoding(args)
262 |     train_backbone = args.lr_backbone > 0
263 |     return_interm_layers = args.masks or (args.num_feature_levels > 1)
264 |     if "resnet" in args.backbone:
265 |         backbone = Backbone(
266 |             args.backbone, train_backbone, return_interm_layers, args.dilation,
267 |         )
268 |     else:
269 |         backbone = TransformerBackbone(
270 |             args.backbone, train_backbone, return_interm_layers, args
271 |         )
272 |     model = Joiner(backbone, position_embedding)
273 |     return model
274 | 


--------------------------------------------------------------------------------
/engine.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # H-DETR
  3 | # Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
  4 | # Licensed under the MIT-style license found in the LICENSE file in the root directory
  5 | # ------------------------------------------------------------------------
  6 | # Deformable DETR
  7 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  8 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  9 | # ------------------------------------------------------------------------
 10 | # Modified from DETR (https://github.com/facebookresearch/detr)
 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 12 | # ------------------------------------------------------------------------
 13 | 
 14 | """
 15 | Train and eval functions used in main.py
 16 | """
 17 | import math
 18 | import os
 19 | import sys
 20 | from typing import Iterable
 21 | import copy
 22 | 
 23 | import wandb
 24 | import torch
 25 | import util.misc as utils
 26 | from datasets.coco_eval import CocoEvaluator
 27 | from datasets.panoptic_eval import PanopticEvaluator
 28 | from datasets.data_prefetcher import data_prefetcher
 29 | 
 30 | scaler = torch.cuda.amp.GradScaler()
 31 | 
 32 | 
 33 | def train_hybrid(outputs, targets, k_one2many, criterion, lambda_one2many):
 34 |     # one-to-one-loss
 35 |     loss_dict = criterion(outputs, targets)
 36 |     multi_targets = copy.deepcopy(targets)
 37 |     # repeat the targets
 38 |     for target in multi_targets:
 39 |         target["boxes"] = target["boxes"].repeat(k_one2many, 1)
 40 |         target["labels"] = target["labels"].repeat(k_one2many)
 41 | 
 42 |     outputs_one2many = dict()
 43 |     outputs_one2many["pred_logits"] = outputs["pred_logits_one2many"]
 44 |     outputs_one2many["pred_boxes"] = outputs["pred_boxes_one2many"]
 45 |     outputs_one2many["aux_outputs"] = outputs["aux_outputs_one2many"]
 46 | 
 47 |     # one-to-many loss
 48 |     loss_dict_one2many = criterion(outputs_one2many, multi_targets)
 49 |     for key, value in loss_dict_one2many.items():
 50 |         if key + "_one2many" in loss_dict.keys():
 51 |             loss_dict[key + "_one2many"] += value * lambda_one2many
 52 |         else:
 53 |             loss_dict[key + "_one2many"] = value * lambda_one2many
 54 |     return loss_dict
 55 | 
 56 | 
 57 | def train_one_epoch(
 58 |     model: torch.nn.Module,
 59 |     criterion: torch.nn.Module,
 60 |     data_loader: Iterable,
 61 |     optimizer: torch.optim.Optimizer,
 62 |     device: torch.device,
 63 |     epoch: int,
 64 |     max_norm: float = 0,
 65 |     k_one2many=1,
 66 |     lambda_one2many=1.0,
 67 |     use_wandb=False,
 68 |     use_fp16=False,
 69 | ):
 70 |     model.train()
 71 |     criterion.train()
 72 |     metric_logger = utils.MetricLogger(delimiter="  ")
 73 |     metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
 74 |     metric_logger.add_meter(
 75 |         "class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
 76 |     )
 77 |     metric_logger.add_meter(
 78 |         "grad_norm", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
 79 |     )
 80 |     header = "Epoch: [{}]".format(epoch)
 81 |     print_freq = 10
 82 | 
 83 |     prefetcher = data_prefetcher(data_loader, device, prefetch=True)
 84 |     samples, targets = prefetcher.next()
 85 | 
 86 |     # for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 87 |     for _ in metric_logger.log_every(range(len(data_loader)), print_freq, header):
 88 |         with torch.cuda.amp.autocast() if use_fp16 else torch.cuda.amp.autocast(
 89 |             enabled=False
 90 |         ):
 91 |             if use_fp16:
 92 |                 optimizer.zero_grad()
 93 |             outputs = model(samples)
 94 | 
 95 |             if k_one2many > 0:
 96 |                 loss_dict = train_hybrid(
 97 |                     outputs, targets, k_one2many, criterion, lambda_one2many
 98 |                 )
 99 |             else:
100 |                 loss_dict = criterion(outputs, targets)
101 |         weight_dict = criterion.weight_dict
102 |         losses = sum(
103 |             loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict
104 |         )
105 | 
106 |         # reduce losses over all GPUs for logging purposes
107 |         loss_dict_reduced = utils.reduce_dict(loss_dict)
108 |         loss_dict_reduced_unscaled = {
109 |             f"{k}_unscaled": v for k, v in loss_dict_reduced.items()
110 |         }
111 |         loss_dict_reduced_scaled = {
112 |             k: v * weight_dict[k]
113 |             for k, v in loss_dict_reduced.items()
114 |             if k in weight_dict
115 |         }
116 |         losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())
117 | 
118 |         loss_value = losses_reduced_scaled.item()
119 | 
120 |         if not math.isfinite(loss_value):
121 |             print("Loss is {}, stopping training".format(loss_value))
122 |             print(loss_dict_reduced)
123 |             sys.exit(1)
124 | 
125 |         if use_fp16:
126 |             scaler.scale(losses).backward()
127 |             scaler.unscale_(optimizer)
128 |         else:
129 |             optimizer.zero_grad()
130 |             losses.backward()
131 |         if max_norm > 0:
132 |             grad_total_norm = torch.nn.utils.clip_grad_norm_(
133 |                 model.parameters(), max_norm
134 |             )
135 |         else:
136 |             grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm)
137 | 
138 |         if use_fp16:
139 |             scaler.step(optimizer)
140 |             scaler.update()
141 |         else:
142 |             optimizer.step()
143 | 
144 |         metric_logger.update(
145 |             loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled
146 |         )
147 |         metric_logger.update(class_error=loss_dict_reduced["class_error"])
148 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
149 |         metric_logger.update(grad_norm=grad_total_norm)
150 | 
151 |         samples, targets = prefetcher.next()
152 | 
153 |         if use_wandb:
154 |             try:
155 |                 wandb.log(loss_dict)
156 |             except:
157 |                 pass
158 |     # gather the stats from all processes
159 |     metric_logger.synchronize_between_processes()
160 |     print("Averaged stats:", metric_logger)
161 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
162 | 
163 | 
164 | @torch.no_grad()
165 | def evaluate(
166 |     model,
167 |     criterion,
168 |     postprocessors,
169 |     data_loader,
170 |     base_ds,
171 |     device,
172 |     output_dir,
173 |     use_wandb=False,
174 | ):
175 |     # disable the one-to-many branch queries
176 |     # save them frist
177 |     save_num_queries = model.module.num_queries
178 |     save_two_stage_num_proposals = model.module.transformer.two_stage_num_proposals
179 |     model.module.num_queries = model.module.num_queries_one2one
180 |     model.module.transformer.two_stage_num_proposals = model.module.num_queries
181 | 
182 |     model.eval()
183 |     criterion.eval()
184 | 
185 |     metric_logger = utils.MetricLogger(delimiter="  ")
186 |     metric_logger.add_meter(
187 |         "class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
188 |     )
189 |     header = "Test:"
190 | 
191 |     iou_types = tuple(k for k in ("segm", "bbox") if k in postprocessors.keys())
192 |     coco_evaluator = CocoEvaluator(base_ds, iou_types)
193 |     # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75]
194 | 
195 |     panoptic_evaluator = None
196 |     if "panoptic" in postprocessors.keys():
197 |         panoptic_evaluator = PanopticEvaluator(
198 |             data_loader.dataset.ann_file,
199 |             data_loader.dataset.ann_folder,
200 |             output_dir=os.path.join(output_dir, "panoptic_eval"),
201 |         )
202 | 
203 |     for samples, targets in metric_logger.log_every(data_loader, 10, header):
204 |         samples = samples.to(device)
205 |         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
206 | 
207 |         outputs = model(samples)
208 |         loss_dict = criterion(outputs, targets)
209 |         weight_dict = criterion.weight_dict
210 | 
211 |         # reduce losses over all GPUs for logging purposes
212 |         loss_dict_reduced = utils.reduce_dict(loss_dict)
213 |         loss_dict_reduced_scaled = {
214 |             k: v * weight_dict[k]
215 |             for k, v in loss_dict_reduced.items()
216 |             if k in weight_dict
217 |         }
218 |         loss_dict_reduced_unscaled = {
219 |             f"{k}_unscaled": v for k, v in loss_dict_reduced.items()
220 |         }
221 |         metric_logger.update(
222 |             loss=sum(loss_dict_reduced_scaled.values()),
223 |             **loss_dict_reduced_scaled,
224 |             **loss_dict_reduced_unscaled,
225 |         )
226 |         metric_logger.update(class_error=loss_dict_reduced["class_error"])
227 | 
228 |         orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
229 |         results = postprocessors["bbox"](outputs, orig_target_sizes)
230 |         if "segm" in postprocessors.keys():
231 |             target_sizes = torch.stack([t["size"] for t in targets], dim=0)
232 |             results = postprocessors["segm"](
233 |                 results, outputs, orig_target_sizes, target_sizes
234 |             )
235 |         res = {
236 |             target["image_id"].item(): output
237 |             for target, output in zip(targets, results)
238 |         }
239 |         if coco_evaluator is not None:
240 |             coco_evaluator.update(res)
241 | 
242 |         if panoptic_evaluator is not None:
243 |             res_pano = postprocessors["panoptic"](
244 |                 outputs, target_sizes, orig_target_sizes
245 |             )
246 |             for i, target in enumerate(targets):
247 |                 image_id = target["image_id"].item()
248 |                 file_name = f"{image_id:012d}.png"
249 |                 res_pano[i]["image_id"] = image_id
250 |                 res_pano[i]["file_name"] = file_name
251 | 
252 |             panoptic_evaluator.update(res_pano)
253 | 
254 |     # gather the stats from all processes
255 |     metric_logger.synchronize_between_processes()
256 |     print("Averaged stats:", metric_logger)
257 |     if coco_evaluator is not None:
258 |         coco_evaluator.synchronize_between_processes()
259 |     if panoptic_evaluator is not None:
260 |         panoptic_evaluator.synchronize_between_processes()
261 | 
262 |     # accumulate predictions from all images
263 |     if coco_evaluator is not None:
264 |         coco_evaluator.accumulate()
265 |         coco_evaluator.summarize()
266 |     panoptic_res = None
267 |     if panoptic_evaluator is not None:
268 |         panoptic_res = panoptic_evaluator.summarize()
269 |     stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
270 |     if coco_evaluator is not None:
271 |         if "bbox" in postprocessors.keys():
272 |             stats["coco_eval_bbox"] = coco_evaluator.coco_eval["bbox"].stats.tolist()
273 |         if "segm" in postprocessors.keys():
274 |             stats["coco_eval_masks"] = coco_evaluator.coco_eval["segm"].stats.tolist()
275 |     if panoptic_res is not None:
276 |         stats["PQ_all"] = panoptic_res["All"]
277 |         stats["PQ_th"] = panoptic_res["Things"]
278 |         stats["PQ_st"] = panoptic_res["Stuff"]
279 |     if use_wandb:
280 |         try:
281 |             wandb.log({"AP": stats["coco_eval_bbox"][0]})
282 |             wandb.log(stats)
283 |         except:
284 |             pass
285 | 
286 |     # recover the model parameters for next training epoch
287 |     model.module.num_queries = save_num_queries
288 |     model.module.transformer.two_stage_num_proposals = save_two_stage_num_proposals
289 |     return stats, coco_evaluator
290 | 


--------------------------------------------------------------------------------
/models/segmentation.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR (https://github.com/facebookresearch/detr)
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # ------------------------------------------------------------------------
  9 | 
 10 | """
 11 | This file provides the definition of the convolutional heads used to predict masks, as well as the losses
 12 | """
 13 | import io
 14 | from collections import defaultdict
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as F
 19 | from PIL import Image
 20 | 
 21 | import util.box_ops as box_ops
 22 | from util.misc import NestedTensor, interpolate, nested_tensor_from_tensor_list
 23 | 
 24 | try:
 25 |     from panopticapi.utils import id2rgb, rgb2id
 26 | except ImportError:
 27 |     pass
 28 | 
 29 | 
 30 | class DETRsegm(nn.Module):
 31 |     def __init__(self, detr, freeze_detr=False):
 32 |         super().__init__()
 33 |         self.detr = detr
 34 | 
 35 |         if freeze_detr:
 36 |             for p in self.parameters():
 37 |                 p.requires_grad_(False)
 38 | 
 39 |         hidden_dim, nheads = detr.transformer.d_model, detr.transformer.nhead
 40 |         self.bbox_attention = MHAttentionMap(hidden_dim, hidden_dim, nheads, dropout=0)
 41 |         self.mask_head = MaskHeadSmallConv(
 42 |             hidden_dim + nheads, [1024, 512, 256], hidden_dim
 43 |         )
 44 | 
 45 |     def forward(self, samples: NestedTensor):
 46 |         if not isinstance(samples, NestedTensor):
 47 |             samples = nested_tensor_from_tensor_list(samples)
 48 |         features, pos = self.detr.backbone(samples)
 49 | 
 50 |         bs = features[-1].tensors.shape[0]
 51 | 
 52 |         src, mask = features[-1].decompose()
 53 |         src_proj = self.detr.input_proj(src)
 54 |         hs, memory = self.detr.transformer(
 55 |             src_proj, mask, self.detr.query_embed.weight, pos[-1]
 56 |         )
 57 | 
 58 |         outputs_class = self.detr.class_embed(hs)
 59 |         outputs_coord = self.detr.bbox_embed(hs).sigmoid()
 60 |         out = {"pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1]}
 61 |         if self.detr.aux_loss:
 62 |             out["aux_outputs"] = [
 63 |                 {"pred_logits": a, "pred_boxes": b}
 64 |                 for a, b in zip(outputs_class[:-1], outputs_coord[:-1])
 65 |             ]
 66 | 
 67 |         # FIXME h_boxes takes the last one computed, keep this in mind
 68 |         bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask)
 69 | 
 70 |         seg_masks = self.mask_head(
 71 |             src_proj,
 72 |             bbox_mask,
 73 |             [features[2].tensors, features[1].tensors, features[0].tensors],
 74 |         )
 75 |         outputs_seg_masks = seg_masks.view(
 76 |             bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1]
 77 |         )
 78 | 
 79 |         out["pred_masks"] = outputs_seg_masks
 80 |         return out
 81 | 
 82 | 
 83 | class MaskHeadSmallConv(nn.Module):
 84 |     """
 85 |     Simple convolutional head, using group norm.
 86 |     Upsampling is done using a FPN approach
 87 |     """
 88 | 
 89 |     def __init__(self, dim, fpn_dims, context_dim):
 90 |         super().__init__()
 91 | 
 92 |         inter_dims = [
 93 |             dim,
 94 |             context_dim // 2,
 95 |             context_dim // 4,
 96 |             context_dim // 8,
 97 |             context_dim // 16,
 98 |             context_dim // 64,
 99 |         ]
100 |         self.lay1 = torch.nn.Conv2d(dim, dim, 3, padding=1)
101 |         self.gn1 = torch.nn.GroupNorm(8, dim)
102 |         self.lay2 = torch.nn.Conv2d(dim, inter_dims[1], 3, padding=1)
103 |         self.gn2 = torch.nn.GroupNorm(8, inter_dims[1])
104 |         self.lay3 = torch.nn.Conv2d(inter_dims[1], inter_dims[2], 3, padding=1)
105 |         self.gn3 = torch.nn.GroupNorm(8, inter_dims[2])
106 |         self.lay4 = torch.nn.Conv2d(inter_dims[2], inter_dims[3], 3, padding=1)
107 |         self.gn4 = torch.nn.GroupNorm(8, inter_dims[3])
108 |         self.lay5 = torch.nn.Conv2d(inter_dims[3], inter_dims[4], 3, padding=1)
109 |         self.gn5 = torch.nn.GroupNorm(8, inter_dims[4])
110 |         self.out_lay = torch.nn.Conv2d(inter_dims[4], 1, 3, padding=1)
111 | 
112 |         self.dim = dim
113 | 
114 |         self.adapter1 = torch.nn.Conv2d(fpn_dims[0], inter_dims[1], 1)
115 |         self.adapter2 = torch.nn.Conv2d(fpn_dims[1], inter_dims[2], 1)
116 |         self.adapter3 = torch.nn.Conv2d(fpn_dims[2], inter_dims[3], 1)
117 | 
118 |         for m in self.modules():
119 |             if isinstance(m, nn.Conv2d):
120 |                 nn.init.kaiming_uniform_(m.weight, a=1)
121 |                 nn.init.constant_(m.bias, 0)
122 | 
123 |     def forward(self, x, bbox_mask, fpns):
124 |         def expand(tensor, length):
125 |             return tensor.unsqueeze(1).repeat(1, int(length), 1, 1, 1).flatten(0, 1)
126 | 
127 |         x = torch.cat([expand(x, bbox_mask.shape[1]), bbox_mask.flatten(0, 1)], 1)
128 | 
129 |         x = self.lay1(x)
130 |         x = self.gn1(x)
131 |         x = F.relu(x)
132 |         x = self.lay2(x)
133 |         x = self.gn2(x)
134 |         x = F.relu(x)
135 | 
136 |         cur_fpn = self.adapter1(fpns[0])
137 |         if cur_fpn.size(0) != x.size(0):
138 |             cur_fpn = expand(cur_fpn, x.size(0) / cur_fpn.size(0))
139 |         x = cur_fpn + F.interpolate(x, size=cur_fpn.shape[-2:], mode="nearest")
140 |         x = self.lay3(x)
141 |         x = self.gn3(x)
142 |         x = F.relu(x)
143 | 
144 |         cur_fpn = self.adapter2(fpns[1])
145 |         if cur_fpn.size(0) != x.size(0):
146 |             cur_fpn = expand(cur_fpn, x.size(0) / cur_fpn.size(0))
147 |         x = cur_fpn + F.interpolate(x, size=cur_fpn.shape[-2:], mode="nearest")
148 |         x = self.lay4(x)
149 |         x = self.gn4(x)
150 |         x = F.relu(x)
151 | 
152 |         cur_fpn = self.adapter3(fpns[2])
153 |         if cur_fpn.size(0) != x.size(0):
154 |             cur_fpn = expand(cur_fpn, x.size(0) / cur_fpn.size(0))
155 |         x = cur_fpn + F.interpolate(x, size=cur_fpn.shape[-2:], mode="nearest")
156 |         x = self.lay5(x)
157 |         x = self.gn5(x)
158 |         x = F.relu(x)
159 | 
160 |         x = self.out_lay(x)
161 |         return x
162 | 
163 | 
164 | class MHAttentionMap(nn.Module):
165 |     """This is a 2D attention module, which only returns the attention softmax (no multiplication by value)"""
166 | 
167 |     def __init__(self, query_dim, hidden_dim, num_heads, dropout=0, bias=True):
168 |         super().__init__()
169 |         self.num_heads = num_heads
170 |         self.hidden_dim = hidden_dim
171 |         self.dropout = nn.Dropout(dropout)
172 | 
173 |         self.q_linear = nn.Linear(query_dim, hidden_dim, bias=bias)
174 |         self.k_linear = nn.Linear(query_dim, hidden_dim, bias=bias)
175 | 
176 |         nn.init.zeros_(self.k_linear.bias)
177 |         nn.init.zeros_(self.q_linear.bias)
178 |         nn.init.xavier_uniform_(self.k_linear.weight)
179 |         nn.init.xavier_uniform_(self.q_linear.weight)
180 |         self.normalize_fact = float(hidden_dim / self.num_heads) ** -0.5
181 | 
182 |     def forward(self, q, k, mask=None):
183 |         q = self.q_linear(q)
184 |         k = F.conv2d(
185 |             k, self.k_linear.weight.unsqueeze(-1).unsqueeze(-1), self.k_linear.bias
186 |         )
187 |         qh = q.view(
188 |             q.shape[0], q.shape[1], self.num_heads, self.hidden_dim // self.num_heads
189 |         )
190 |         kh = k.view(
191 |             k.shape[0],
192 |             self.num_heads,
193 |             self.hidden_dim // self.num_heads,
194 |             k.shape[-2],
195 |             k.shape[-1],
196 |         )
197 |         weights = torch.einsum("bqnc,bnchw->bqnhw", qh * self.normalize_fact, kh)
198 | 
199 |         if mask is not None:
200 |             weights.masked_fill_(mask.unsqueeze(1).unsqueeze(1), float("-inf"))
201 |         weights = F.softmax(weights.flatten(2), dim=-1).view_as(weights)
202 |         weights = self.dropout(weights)
203 |         return weights
204 | 
205 | 
206 | def dice_loss(inputs, targets, num_boxes):
207 |     """
208 |     Compute the DICE loss, similar to generalized IOU for masks
209 |     Args:
210 |         inputs: A float tensor of arbitrary shape.
211 |                 The predictions for each example.
212 |         targets: A float tensor with the same shape as inputs. Stores the binary
213 |                  classification label for each element in inputs
214 |                 (0 for the negative class and 1 for the positive class).
215 |     """
216 |     inputs = inputs.sigmoid()
217 |     inputs = inputs.flatten(1)
218 |     numerator = 2 * (inputs * targets).sum(1)
219 |     denominator = inputs.sum(-1) + targets.sum(-1)
220 |     loss = 1 - (numerator + 1) / (denominator + 1)
221 |     return loss.sum() / num_boxes
222 | 
223 | 
224 | def sigmoid_focal_loss(
225 |     inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2
226 | ):
227 |     """
228 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
229 |     Args:
230 |         inputs: A float tensor of arbitrary shape.
231 |                 The predictions for each example.
232 |         targets: A float tensor with the same shape as inputs. Stores the binary
233 |                  classification label for each element in inputs
234 |                 (0 for the negative class and 1 for the positive class).
235 |         alpha: (optional) Weighting factor in range (0,1) to balance
236 |                 positive vs negative examples. Default = -1 (no weighting).
237 |         gamma: Exponent of the modulating factor (1 - p_t) to
238 |                balance easy vs hard examples.
239 |     Returns:
240 |         Loss tensor
241 |     """
242 |     prob = inputs.sigmoid()
243 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
244 |     p_t = prob * targets + (1 - prob) * (1 - targets)
245 |     loss = ce_loss * ((1 - p_t) ** gamma)
246 | 
247 |     if alpha >= 0:
248 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
249 |         loss = alpha_t * loss
250 | 
251 |     return loss.mean(1).sum() / num_boxes
252 | 
253 | 
254 | class PostProcessSegm(nn.Module):
255 |     def __init__(self, threshold=0.5):
256 |         super().__init__()
257 |         self.threshold = threshold
258 | 
259 |     @torch.no_grad()
260 |     def forward(self, results, outputs, orig_target_sizes, max_target_sizes):
261 |         assert len(orig_target_sizes) == len(max_target_sizes)
262 |         max_h, max_w = max_target_sizes.max(0)[0].tolist()
263 |         outputs_masks = outputs["pred_masks"].squeeze(2)
264 |         outputs_masks = F.interpolate(
265 |             outputs_masks, size=(max_h, max_w), mode="bilinear", align_corners=False
266 |         )
267 |         outputs_masks = (outputs_masks.sigmoid() > self.threshold).cpu()
268 | 
269 |         for i, (cur_mask, t, tt) in enumerate(
270 |             zip(outputs_masks, max_target_sizes, orig_target_sizes)
271 |         ):
272 |             img_h, img_w = t[0], t[1]
273 |             results[i]["masks"] = cur_mask[:, :img_h, :img_w].unsqueeze(1)
274 |             results[i]["masks"] = F.interpolate(
275 |                 results[i]["masks"].float(), size=tuple(tt.tolist()), mode="nearest"
276 |             ).byte()
277 | 
278 |         return results
279 | 
280 | 
281 | class PostProcessPanoptic(nn.Module):
282 |     """This class converts the output of the model to the final panoptic result, in the format expected by the
283 |     coco panoptic API """
284 | 
285 |     def __init__(self, is_thing_map, threshold=0.85):
286 |         """
287 |         Parameters:
288 |            is_thing_map: This is a whose keys are the class ids, and the values a boolean indicating whether
289 |                           the class is  a thing (True) or a stuff (False) class
290 |            threshold: confidence threshold: segments with confidence lower than this will be deleted
291 |         """
292 |         super().__init__()
293 |         self.threshold = threshold
294 |         self.is_thing_map = is_thing_map
295 | 
296 |     def forward(self, outputs, processed_sizes, target_sizes=None):
297 |         """ This function computes the panoptic prediction from the model's predictions.
298 |         Parameters:
299 |             outputs: This is a dict coming directly from the model. See the model doc for the content.
300 |             processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the
301 |                              model, ie the size after data augmentation but before batching.
302 |             target_sizes: This is a list of tuples (or torch tensors) corresponding to the requested final size
303 |                           of each prediction. If left to None, it will default to the processed_sizes
304 |             """
305 |         if target_sizes is None:
306 |             target_sizes = processed_sizes
307 |         assert len(processed_sizes) == len(target_sizes)
308 |         out_logits, raw_masks, raw_boxes = (
309 |             outputs["pred_logits"],
310 |             outputs["pred_masks"],
311 |             outputs["pred_boxes"],
312 |         )
313 |         assert len(out_logits) == len(raw_masks) == len(target_sizes)
314 |         preds = []
315 | 
316 |         def to_tuple(tup):
317 |             if isinstance(tup, tuple):
318 |                 return tup
319 |             return tuple(tup.cpu().tolist())
320 | 
321 |         for cur_logits, cur_masks, cur_boxes, size, target_size in zip(
322 |             out_logits, raw_masks, raw_boxes, processed_sizes, target_sizes
323 |         ):
324 |             # we filter empty queries and detection below threshold
325 |             scores, labels = cur_logits.softmax(-1).max(-1)
326 |             keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (
327 |                 scores > self.threshold
328 |             )
329 |             cur_scores, cur_classes = cur_logits.softmax(-1).max(-1)
330 |             cur_scores = cur_scores[keep]
331 |             cur_classes = cur_classes[keep]
332 |             cur_masks = cur_masks[keep]
333 |             cur_masks = interpolate(
334 |                 cur_masks[None], to_tuple(size), mode="bilinear"
335 |             ).squeeze(0)
336 |             cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep])
337 | 
338 |             h, w = cur_masks.shape[-2:]
339 |             assert len(cur_boxes) == len(cur_classes)
340 | 
341 |             # It may be that we have several predicted masks for the same stuff class.
342 |             # In the following, we track the list of masks ids for each stuff class (they are merged later on)
343 |             cur_masks = cur_masks.flatten(1)
344 |             stuff_equiv_classes = defaultdict(lambda: [])
345 |             for k, label in enumerate(cur_classes):
346 |                 if not self.is_thing_map[label.item()]:
347 |                     stuff_equiv_classes[label.item()].append(k)
348 | 
349 |             def get_ids_area(masks, scores, dedup=False):
350 |                 # This helper function creates the final panoptic segmentation image
351 |                 # It also returns the area of the masks that appears on the image
352 | 
353 |                 m_id = masks.transpose(0, 1).softmax(-1)
354 | 
355 |                 if m_id.shape[-1] == 0:
356 |                     # We didn't detect any mask :(
357 |                     m_id = torch.zeros((h, w), dtype=torch.long, device=m_id.device)
358 |                 else:
359 |                     m_id = m_id.argmax(-1).view(h, w)
360 | 
361 |                 if dedup:
362 |                     # Merge the masks corresponding to the same stuff class
363 |                     for equiv in stuff_equiv_classes.values():
364 |                         if len(equiv) > 1:
365 |                             for eq_id in equiv:
366 |                                 m_id.masked_fill_(m_id.eq(eq_id), equiv[0])
367 | 
368 |                 final_h, final_w = to_tuple(target_size)
369 | 
370 |                 seg_img = Image.fromarray(id2rgb(m_id.view(h, w).cpu().numpy()))
371 |                 seg_img = seg_img.resize(
372 |                     size=(final_w, final_h), resample=Image.NEAREST
373 |                 )
374 | 
375 |                 np_seg_img = (
376 |                     torch.ByteTensor(torch.ByteStorage.from_buffer(seg_img.tobytes()))
377 |                     .view(final_h, final_w, 3)
378 |                     .numpy()
379 |                 )
380 |                 m_id = torch.from_numpy(rgb2id(np_seg_img))
381 | 
382 |                 area = []
383 |                 for i in range(len(scores)):
384 |                     area.append(m_id.eq(i).sum().item())
385 |                 return area, seg_img
386 | 
387 |             area, seg_img = get_ids_area(cur_masks, cur_scores, dedup=True)
388 |             if cur_classes.numel() > 0:
389 |                 # We know filter empty masks as long as we find some
390 |                 while True:
391 |                     filtered_small = torch.as_tensor(
392 |                         [area[i] <= 4 for i, c in enumerate(cur_classes)],
393 |                         dtype=torch.bool,
394 |                         device=keep.device,
395 |                     )
396 |                     if filtered_small.any().item():
397 |                         cur_scores = cur_scores[~filtered_small]
398 |                         cur_classes = cur_classes[~filtered_small]
399 |                         cur_masks = cur_masks[~filtered_small]
400 |                         area, seg_img = get_ids_area(cur_masks, cur_scores)
401 |                     else:
402 |                         break
403 | 
404 |             else:
405 |                 cur_classes = torch.ones(1, dtype=torch.long, device=cur_classes.device)
406 | 
407 |             segments_info = []
408 |             for i, a in enumerate(area):
409 |                 cat = cur_classes[i].item()
410 |                 segments_info.append(
411 |                     {
412 |                         "id": i,
413 |                         "isthing": self.is_thing_map[cat],
414 |                         "category_id": cat,
415 |                         "area": a,
416 |                     }
417 |                 )
418 |             del cur_classes
419 | 
420 |             with io.BytesIO() as out:
421 |                 seg_img.save(out, format="PNG")
422 |                 predictions = {
423 |                     "png_string": out.getvalue(),
424 |                     "segments_info": segments_info,
425 |                 }
426 |             preds.append(predictions)
427 |         return preds
428 | 


--------------------------------------------------------------------------------