├── .vscode
    └── settings.json
├── README.md
├── configs
    ├── FCOS-Detection
    │   ├── Base-ATSS.yaml
    │   ├── Base-FCOS.yaml
    │   ├── FCOS_RT
    │   │   ├── MS_DLA_34_4x_syncbn.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_bn_head.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_shared_towers.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml
    │   │   ├── MS_R_50_4x_syncbn.yaml
    │   │   └── MS_R_50_4x_syncbn_bn_head.yaml
    │   ├── MS_R_101_2x.yaml
    │   ├── MS_R_50_2x.yaml
    │   ├── MS_X_101_32x8d_2x.yaml
    │   ├── MS_X_101_32x8d_2x_dcnv2.yaml
    │   ├── MS_X_101_64x4d_2x.yaml
    │   ├── MS_X_101_64x4d_2x_dcnv2.yaml
    │   ├── README.md
    │   ├── RS_50_1x.yaml
    │   ├── R_50_1x.yaml
    │   ├── atss_r_50.yaml
    │   └── vovnet
    │   │   ├── MS_V_39_3x.yaml
    │   │   ├── MS_V_57_3x.yaml
    │   │   ├── MS_V_99_3x.yaml
    │   │   └── README.md
    └── RCNN
    │   ├── 550_R_50_FPN_3x.yaml
    │   ├── Base-RCNN-FPN.yaml
    │   ├── Base-RCNN.yaml
    │   ├── LVIS
    │       └── R_50_1x.yaml
    │   ├── R_101_3x.yaml
    │   ├── faster_rcnn_RS_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_50_FPN_1x_tta.yaml
    │   └── libra_rcnn
    │       └── r_50_1x.yaml
├── det
    ├── _C.cpython-36m-x86_64-linux-gnu.so
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-36.pyc
    ├── checkpoint
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── det_checkpoint.cpython-36.pyc
    │   └── det_checkpoint.py
    ├── config
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── config.cpython-36.pyc
    │   │   └── defaults.cpython-36.pyc
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── builtin.cpython-36.pyc
    │   │   ├── dataset_mapper.cpython-36.pyc
    │   │   └── detection_utils.cpython-36.pyc
    │   ├── builtin.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   ├── __pycache__
    │   │   │   └── text.cpython-36.pyc
    │   │   ├── augment_lists.py
    │   │   ├── fast_augment.py
    │   │   └── text.py
    │   └── detection_utils.py
    ├── layers
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── balanced_l1_loss.cpython-36.pyc
    │   │   ├── bezier_align.cpython-36.pyc
    │   │   ├── conv_with_kaiming_uniform.cpython-36.pyc
    │   │   ├── def_roi_align.cpython-36.pyc
    │   │   ├── deform_conv.cpython-36.pyc
    │   │   ├── gcn.cpython-36.pyc
    │   │   ├── iou_loss.cpython-36.pyc
    │   │   ├── ml_nms.cpython-36.pyc
    │   │   ├── naive_group_norm.cpython-36.pyc
    │   │   └── soft_nms.cpython-36.pyc
    │   ├── balanced_l1_loss.py
    │   ├── bezier_align.py
    │   ├── conv_with_kaiming_uniform.py
    │   ├── csrc
    │   │   ├── BezierAlign
    │   │   │   ├── BezierAlign.h
    │   │   │   ├── BezierAlign_cpu.cpp
    │   │   │   └── BezierAlign_cuda.cu
    │   │   ├── DefROIAlign
    │   │   │   ├── DefROIAlign.h
    │   │   │   └── DefROIAlign_cuda.cu
    │   │   ├── cuda_version.cu
    │   │   ├── ml_nms
    │   │   │   ├── ml_nms.cu
    │   │   │   └── ml_nms.h
    │   │   └── vision.cpp
    │   ├── def_roi_align.py
    │   ├── deform_conv.py
    │   ├── gcn.py
    │   ├── iou_loss.py
    │   ├── ml_nms.py
    │   ├── naive_group_norm.py
    │   └── soft_nms.py
    ├── modeling
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── one_stage_detector.cpython-36.pyc
    │   │   └── rpn_utils.cpython-36.pyc
    │   ├── atss
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── atss.cpython-36.pyc
    │   │   │   └── atss_outputs.cpython-36.pyc
    │   │   ├── atss.py
    │   │   └── atss_outputs.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── dla.cpython-36.pyc
    │   │   │   ├── fpn.cpython-36.pyc
    │   │   │   ├── lpf.cpython-36.pyc
    │   │   │   ├── mobilenet.cpython-36.pyc
    │   │   │   ├── resnet.cpython-36.pyc
    │   │   │   ├── resnet_interval.cpython-36.pyc
    │   │   │   ├── resnet_lpf.cpython-36.pyc
    │   │   │   ├── splat.cpython-36.pyc
    │   │   │   └── vovnet.cpython-36.pyc
    │   │   ├── dla.py
    │   │   ├── fpn.py
    │   │   ├── lpf.py
    │   │   ├── mobilenet.py
    │   │   ├── resnet.py
    │   │   ├── resnet_interval.py
    │   │   ├── resnet_lpf.py
    │   │   ├── splat.py
    │   │   └── vovnet.py
    │   ├── fcos
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── fcos.cpython-36.pyc
    │   │   │   └── fcos_outputs.cpython-36.pyc
    │   │   ├── fcos.py
    │   │   └── fcos_outputs.py
    │   ├── guided_anchoring
    │   │   ├── ga_outputs.py
    │   │   └── guided_anchor_head.py
    │   ├── one_stage_detector.py
    │   ├── poolers.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   └── libra_rcnn.cpython-36.pyc
    │   │   └── libra_rcnn.py
    │   ├── rpn_utils.py
    │   └── tsd
    │   │   └── tsd.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       └── comm.cpython-36.pyc
    │   ├── comm.py
    │   ├── measures.py
    │   └── visualizer.py
├── docs
    └── nms
    │   ├── README.md
    │   ├── __init__.py
    │   ├── demo_nms.cpp
    │   ├── demo_nms.py
    │   ├── nms.cpp
    │   ├── nms.py
    │   ├── nms_wrapper.py
    │   └── src
    │       ├── nms_cpu.cpp
    │       ├── nms_cuda.cpp
    │       ├── nms_kernel.cu
    │       ├── soft_nms_cpu.cpp
    │       └── soft_nms_cpu.pyx
├── setup.py
└── tools
    ├── compute_flops.py
    ├── train_net.py
    └── visualize_data.py


/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/home/fei/anaconda3/envs/detectron2/bin/python"
3 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Object-Detection.pytorch
 2 | 
 3 | ## bdd100k Dataset Baseline
 4 | - we use `mmdetection` to train all models.
 5 | - All models were trained on `bdd100k_train`, and tested on the `bdd100k_val`.
 6 | - We use distributed training across 8 Nvdia-1080Ti GPUs. 
 7 | 
 8 | ### Anchor-based:
 9 | |  Name    | backbone | tricks |  AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
10 | | :------: |:------:  |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
11 | |  FasterRCNN | R_50_FPN |      | 0.318 | 0.551 | 0.311 | 0.145 | 0.356 | 0.497|
12 | |  FasterRCNN | R_101_FPN|    |  0.322 | 0.553 | 0.314 | 0.142 | 0.360 | 0.512 |
13 | | CascadeRCNN | R_50_FPN |    | 0.332  | 0.558 | 0.331 | 0.150 | 0.371 | 0.520 |
14 | |  PISA    | R_50_FPN |      | 
15 | |  LibraRCNN| R_50_FPN|      | 
16 | |  GA      | R_50_FPN |      | 
17 | 
18 | 
19 | ### Anchor-free
20 | |  Name    | backbone | tricks |  AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
21 | | :------: |:------:  |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
22 | | FCOS     | R_50_FPN |        |0.304 | 0.539 | 0.290 | 0.129 | 0.338 | 0.498 |
23 | | ATSS     | R_50_FPN |      | 0.329 | 0.562 | 0.323 | 0.141 | 0.367 | 0.517| 
24 | | CenterNet| R_50_DCN|       
25 | | RepPoints| R_50_FPN |     | 0.312 | 0.555 | 0.297 | 0.129 | 0.348 | 0.505 |
26 | 
27 | ## CenterNet series
28 | |  Name    | backbone | Iters  | AP   |  AP50  |  AP75  |  APs   |  APm     |  APl   |
29 | | :------: |:------:  |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
30 | | CenterNet| R_50_DCN |125997  | 27.5269|44.7613| 28.8301 |9.6805 | 31.4682 |43.1641 |
31 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/Base-ATSS.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "OneStageDetector"
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res3", "res4", "res5"]
 9 |   PROPOSAL_GENERATOR:
10 |     NAME: "ATSS"
11 |   ANCHOR_GENERATOR:
12 |     NAME: "ATSSAnchorGenerator"  
13 | 
14 | 
15 |   # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
16 | DATASETS:
17 |   TRAIN: ("coco_2017_train",)
18 |   TEST: ("coco_2017_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
22 |   STEPS: (60000, 80000)
23 |   MAX_ITER: 90000
24 | INPUT:
25 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)


--------------------------------------------------------------------------------
/configs/FCOS-Detection/Base-FCOS.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "OneStageDetector"
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res3", "res4", "res5"]
 9 |   PROPOSAL_GENERATOR:
10 |     NAME: "FCOS"
11 |   # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
12 | DATASETS:
13 |   TRAIN: ("coco_2017_train",)
14 |   TEST: ("coco_2017_val",)
15 | SOLVER:
16 |   IMS_PER_BATCH: 16
17 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 | SOLVER:
23 |   STEPS: (300000, 340000)
24 |   MAX_ITER: 360000
25 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn"
26 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NORM: "SyncBN"
23 | SOLVER:
24 |   STEPS: (300000, 340000)
25 |   MAX_ITER: 360000
26 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_bn_head"
27 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NUM_SHARE_CONVS: 4
23 |     NUM_BOX_CONVS: 0
24 |     NUM_CLS_CONVS: 0
25 | SOLVER:
26 |   STEPS: (300000, 340000)
27 |   MAX_ITER: 360000
28 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers"
29 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NUM_SHARE_CONVS: 4
23 |     NUM_BOX_CONVS: 0
24 |     NUM_CLS_CONVS: 0
25 |     NORM: "SyncBN"
26 | SOLVER:
27 |   STEPS: (300000, 340000)
28 |   MAX_ITER: 360000
29 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers_bn_head"
30 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |   FCOS:
13 |     TOP_LEVELS: 0
14 |     SIZES_OF_INTEREST: [64, 128]
15 |     FPN_STRIDES: [8, 16, 32]
16 |     IN_FEATURES: ['p3', 'p4', 'p5']
17 | SOLVER:
18 |   STEPS: (300000, 340000)
19 |   MAX_ITER: 360000
20 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn"
21 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |   FCOS:
13 |     TOP_LEVELS: 0
14 |     SIZES_OF_INTEREST: [64, 128]
15 |     FPN_STRIDES: [8, 16, 32]
16 |     IN_FEATURES: ['p3', 'p4', 'p5']
17 |     NORM: "SyncBN"
18 | SOLVER:
19 |   STEPS: (300000, 340000)
20 |   MAX_ITER: 360000
21 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn_bn_head"
22 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_101_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (120000, 160000)
 8 |   MAX_ITER: 180000
 9 | OUTPUT_DIR: "output/fcos/R_101_2x"
10 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_50_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (120000, 160000)
 8 |   MAX_ITER: 180000
 9 | OUTPUT_DIR: "output/fcos/R_50_2x"
10 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (120000, 160000)
12 |   MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/X_101_2x"
14 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 |     DEFORM_MODULATED: True
12 |   FCOS:
13 |     USE_DEFORMABLE: True
14 | SOLVER:
15 |   STEPS: (120000, 160000)
16 |   MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2"
18 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
 4 |   PIXEL_STD: [1.0, 1.0, 1.0]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 64
 8 |     WIDTH_PER_GROUP: 4
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (120000, 160000)
12 |   MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x"
14 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
 4 |   PIXEL_STD: [1.0, 1.0, 1.0]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 64
 8 |     WIDTH_PER_GROUP: 4
 9 |     DEPTH: 101
10 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 |     DEFORM_MODULATED: True
12 |   FCOS:
13 |     USE_DEFORMABLE: True
14 | SOLVER:
15 |   STEPS: (120000, 160000)
16 |   MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x_dcnv2"
18 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/README.md:
--------------------------------------------------------------------------------
 1 | # FCOS: Fully Convolutional One-Stage Object Detection
 2 | 
 3 |     FCOS: Fully Convolutional One-Stage Object Detection;
 4 |     Zhi Tian, Chunhua Shen, Hao Chen, and Tong He;
 5 |     In: Proc. Int. Conf. Computer Vision (ICCV), 2019.
 6 |     arXiv preprint arXiv:1904.01355 
 7 | 
 8 | [[`Paper`](https://arxiv.org/abs/1904.01355)] [[`BibTeX`](#citing-fcos)]
 9 | 
10 | # Installation & Quick Start
11 | No special setup needed. The [default instruction](../../README.md#Installation) is fine.
12 | 
13 | ## Models
14 | ### COCO Object Detecton Baselines with [FCOS](https://arxiv.org/abs/1904.01355)
15 | 
16 | Name | inf. time | box AP | download
17 | --- |:---:|:---:|:---:
18 | [FCOS_R_50_1x](R_50_1x.yaml) | 16 FPS | 38.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/glqFc13cCoEyHYy/download)
19 | [FCOS_MS_R_50_2x](MS_R_50_2x.yaml) | 16 FPS | 41.0 | [model](https://cloudstor.aarnet.edu.au/plus/s/reA6HVaGX47yKGV/download)
20 | [FCOS_MS_R_101_2x](MS_R_101_2x.yaml) | 12 FPS | 43.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download)
21 | [FCOS_MS_X_101_32x8d_2x](MS_X_101_32x8d_2x.yaml) | 6.6 FPS | 43.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/R7H00WeWKZG45pP/download)
22 | [FCOS_MS_X_101_64x4d_2x](MS_X_101_64x4d_2x.yaml) | 6.1 FPS | 44.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/XOLUCzqKYckNII7/download)
23 | [FCOS_MS_X_101_32x8d_dcnv2_2x](MS_X_101_32x8d_2x_dcnv2.yaml) | 4.6 FPS | 46.6 | [model](https://cloudstor.aarnet.edu.au/plus/s/TDsnYK8OXDTrafF/download)
24 | 
25 | *Except for FCOS_R_50_1x, all other models are trained with multi-scale data augmentation.*
26 | 
27 | ### FCOS Real-time Models
28 | 
29 | Name | inf. time | box AP | download
30 | --- |:---:|:---:|:---:
31 | [FCOS_RT_MS_DLA_34_4x_shtw](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml) | 52 FPS | 39.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/4vc3XwQezyhNvnB/download)
32 | [FCOS_RT_MS_DLA_34_4x](FCOS_RT/MS_DLA_34_4x_syncbn.yaml) | 46 FPS | 40.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/zNPNyTkizaOOsUQ/download)
33 | [FCOS_RT_MS_R_50_4x](FCOS_RT/MS_R_50_4x_syncbn.yaml) | 38 FPS | 40.2 | [model](https://cloudstor.aarnet.edu.au/plus/s/TlnlXUr6lNNSyoZ/download)
34 | 
35 | If you prefer BN in FCOS heads, please try the following models.
36 | 
37 | Name | inf. time | box AP | download
38 | --- |:---:|:---:|:---:
39 | [FCOS_RT_MS_DLA_34_4x_shtw_bn](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml) | 52 FPS | 38.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/rdmHHSs4oCg7l7U/download)
40 | [FCOS_RT_MS_DLA_34_4x_bn](FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml) | 48 FPS | 39.4 | [model](https://cloudstor.aarnet.edu.au/plus/s/T5httPVo1VndbD4/download)
41 | [FCOS_RT_MS_R_50_4x_bn](FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml) | 40 FPS | 39.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/dHNUNs0YxVhZAmg/download)
42 | 
43 | *Inference time is measured on a NVIDIA 1080Ti with batch size 1. Real-time models use shorter side 512 for inference.*
44 | 
45 | # Citing FCOS
46 | If you use FCOS in your research or wish to refer to the baseline results, please use the following BibTeX entries.
47 | ```BibTeX
48 | @inproceedings{tian2019fcos,
49 |   title     =  {{FCOS}: Fully Convolutional One-Stage Object Detection},
50 |   author    =  {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
51 |   booktitle =  {Proc. Int. Conf. Computer Vision (ICCV)},
52 |   year      =  {2019}
53 | }
54 | ```
55 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/RS_50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   #WEIGHTS: "https://hangzh.s3-us-west-1.amazonaws.com/encoding/models/resnest50_detectron-255b5649.pth"
 4 |         #"detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     STRIDE_IN_1X1: False
 9 |     RADIX: 2
10 |     DEEP_STEM: True
11 |     AVD: True
12 |     AVG_DOWN: True
13 |     NORM: "SyncBN"
14 |   FPN:
15 |     NORM: "SyncBN"
16 |   ROI_BOX_HEAD:
17 |     NAME: "FastRCNNConvFCHead"
18 |     NUM_CONV: 4
19 |     NUM_FC: 1
20 |     NORM: "SyncBN"
21 |   PIXEL_MEAN: [123.68, 116.779, 103.939]
22 |   PIXEL_STD: [58.393, 57.12, 57.375]
23 | SOLVER:
24 |   IMS_PER_BATCH: 16  #16
25 |   BASE_LR: 0.02    #original:0.02
26 | INPUT:
27 |   MIN_SIZE_TRAIN: (640, 800)
28 |   MIN_SIZE_TRAIN_SAMPLING: "range"
29 |   MAX_SIZE_TRAIN: 1333
30 |   FORMAT: "RGB"


--------------------------------------------------------------------------------
/configs/FCOS-Detection/R_50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | INPUT:
 7 |   MIN_SIZE_TRAIN: (800,)    
 8 | SOLVER:
 9 |   WARMUP_METHOD: "constant"
10 |   WARMUP_FACTOR: 0.3333
11 |   WARMUP_ITERS: 500
12 | OUTPUT_DIR: "output/fcos/R_50_1x"
13 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/atss_r_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-ATSS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | INPUT:
 7 |   MIN_SIZE_TRAIN: (800,)    
 8 | SOLVER:
 9 |   WARMUP_METHOD: "constant"
10 |   WARMUP_FACTOR: 0.3333
11 |   WARMUP_ITERS: 500
12 | OUTPUT_DIR: "output/atss/R_50_1x"


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_39_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-39-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_39_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_57_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-57-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_57_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_99_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-99-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_99_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/README.md:
--------------------------------------------------------------------------------
 1 | # [VoVNet-v2](https://github.com/youngwanLEE/CenterMask) backbone networks in [FCOS](https://github.com/aim-uofa/det)
 2 | **Efficient Backbone Network for Object Detection and Segmentation**\
 3 | Youngwan Lee
 4 | 
 5 | 
 6 | [[`vovnet-detectron2`](https://github.com/youngwanLEE/vovnet-detectron2)][[`CenterMask(code)`](https://github.com/youngwanLEE/CenterMask)] [[`VoVNet-v1(arxiv)`](https://arxiv.org/abs/1904.09730)] [[`VoVNet-v2(arxiv)`](https://arxiv.org/abs/1911.06667)] [[`BibTeX`](#CitingVoVNet)]
 7 | 
 8 | 
 9 | <div align="center">
10 |   <img src="https://dl.dropbox.com/s/jgi3c5828dzcupf/osa_updated.jpg" width="700px" />
11 | </div>
12 | 
13 |   
14 | ## Comparison with Faster R-CNN and ResNet
15 | 
16 | ### Note
17 | 
18 | We measure the inference time of all models with batch size 1 on the same V100 GPU machine.
19 | 
20 | - pytorch1.3.1
21 | - CUDA 10.1
22 | - cuDNN 7.3
23 | 
24 | 
25 | |Method|Backbone|lr sched|inference time|AP|APs|APm|APl|download|
26 | |---|:--------:|:---:|:--:|--|----|----|---|--------|
27 | |Faster|R-50-FPN|3x|0.047|40.2|24.2|43.5|52.0|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a>
28 | |Faster|**V2-39-FPN**|3x|0.047|42.7|27.1|45.6|54.0|<a href="https://dl.dropbox.com/s/dkto39ececze6l4/faster_V_39_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/dx9qz1dn65ccrwd/faster_V_39_eSE_ms_3x_metrics.json">metrics</a>
29 | |**FCOS**|**V2-39-FPN**|3x|0.045|43.5|28.1|47.2|54.5|<a href="https://dl.dropbox.com/s/t51vrqiekid49vp/fcos_V_39_eSE_FPN_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://www.dropbox.com/s/jhu301a95o7lzw1/fcos_V_39_eSE_FPN_ms_3x_metrics.json">metrics</a>
30 | ||
31 | |Faster|R-101-FPN|3x|0.063|42.0|25.2|45.6|54.6|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a>
32 | |Faster|**V2-57-FPN**|3x|0.054|43.3|27.5|46.7|55.3|<a href="https://dl.dropbox.com/s/c7mb1mq10eo4pzk/faster_V_57_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/3tsn218zzmuhyo8/faster_V_57_eSE_metrics.json">metrics</a>
33 | |**FCOS**|**V2-57-FPN**|3x|0.051|44.4|28.8|47.2|56.3|<a href="https://dl.dropbox.com/s/c7mb1mq10eo4pzk/faster_V_57_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/3tsn218zzmuhyo8/faster_V_57_eSE_metrics.json">metrics</a>
34 | ||
35 | |Faster|X-101-FPN|3x|0.120|43.0|27.2|46.1|54.9|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a>|
36 | |Faster|**V2-99-FPN**|3x|0.073|44.1|28.1|47.0|56.4|<a href="https://dl.dropbox.com/s/v64mknwzfpmfcdh/faster_V_99_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/zvaz9s8gvq2mhrd/faster_V_99_eSE_ms_3x_metrics.json">metrics</a>|
37 | |**FCOS**|**V2-99-FPN**|3x|0.070|45.2|29.2|48.4|57.3|<a href="https://www.dropbox.com/s/cztd5jry52cy6vx/fcos_V_99_eSE_FPN_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://www.dropbox.com/s/zdfb5zjl9lhi5p8/fcos_V_99_eSE_FPN_ms_3x_metrics.json">metrics</a>|
38 | 
39 | 
40 | 
41 | ## <a name="CitingVoVNet"></a>Citing VoVNet
42 | 
43 | If you use VoVNet, please use the following BibTeX entry.
44 | 
45 | ```BibTeX
46 | @inproceedings{lee2019energy,
47 |   title = {An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection},
48 |   author = {Lee, Youngwan and Hwang, Joong-won and Lee, Sangrok and Bae, Yuseok and Park, Jongyoul},
49 |   booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
50 |   year = {2019}
51 | }
52 | 
53 | @article{lee2019centermask,
54 |   title={CenterMask: Real-Time Anchor-Free Instance Segmentation},
55 |   author={Lee, Youngwan and Park, Jongyoul},
56 |   journal={arXiv preprint arXiv:1911.06667},
57 |   year={2019}
58 | }
59 | ```
60 | 


--------------------------------------------------------------------------------
/configs/RCNN/550_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "output/mask_rcnn/550_R_50_3x/model_final.pth"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550)
12 |   MAX_SIZE_TRAIN: 916
13 |   MIN_SIZE_TEST: 550
14 |   MAX_SIZE_TEST: 916
15 | OUTPUT_DIR: "output/mask_rcnn/550_R_50_3x"
16 | 


--------------------------------------------------------------------------------
/configs/RCNN/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/RCNN/Base-RCNN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/RCNN/LVIS/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-LVIS.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/lvis/mask_rcnn/R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/RCNN/R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_RS_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   #WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNEST: True
 5 |   BACKBONE:
 6 |     NAME: "build_resnest_fpn_backbone"
 7 |   MASK_ON: False
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STRIDE_IN_1X1: False
11 |     RADIX: 2
12 |     DEEP_STEM: True
13 |     AVD: True
14 |     AVG_DOWN: True
15 |     NORM: "GN"
16 | 


--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_R_50_FPN_1x_tta.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | TEST:
8 |   AUG:
9 |     ENABLED: True


--------------------------------------------------------------------------------
/configs/RCNN/libra_rcnn/r_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 |   ROI_HEADS:
8 |     NAME: "LibraRCNNROIHeads"
9 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]  


--------------------------------------------------------------------------------
/det/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/det/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from det import modeling
3 | 
4 | __version__ = "0.1.1"


--------------------------------------------------------------------------------
/det/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from .det_checkpoint import detCheckpointer
2 | 
3 | __all__ = ["detCheckpointer"]
4 | 


--------------------------------------------------------------------------------
/det/checkpoint/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc


--------------------------------------------------------------------------------
/det/checkpoint/det_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import pickle, os
 2 | from fvcore.common.file_io import PathManager
 3 | from detectron2.checkpoint import DetectionCheckpointer
 4 | 
 5 | 
 6 | class detCheckpointer(DetectionCheckpointer):
 7 |     """
 8 |     Same as :class:`DetectronCheckpointer`, but is able to convert models
 9 |     in AdelaiDet, such as LPF backbone.
10 |     """
11 |     def _load_file(self, filename):
12 |         if filename.endswith(".pkl"):
13 |             with PathManager.open(filename, "rb") as f:
14 |                 data = pickle.load(f, encoding="latin1")
15 |             if "model" in data and "__author__" in data:
16 |                 # file is in Detectron2 model zoo format
17 |                 self.logger.info("Reading a file from '{}'".format(data["__author__"]))
18 |                 return data
19 |             else:
20 |                 # assume file is from Caffe2 / Detectron1 model zoo
21 |                 if "blobs" in data:
22 |                     # Detection models have "blobs", but ImageNet models don't
23 |                     data = data["blobs"]
24 |                 data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
25 |                 if "weight_order" in data:
26 |                     del data["weight_order"]
27 |                 return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
28 | 
29 |         loaded = super()._load_file(filename)  # load native pth checkpoint
30 |         if "model" not in loaded:
31 |             loaded = {"model": loaded}
32 | 
33 |         basename = os.path.basename(filename).lower()
34 |         if "lpf" in basename or "dla" in basename:
35 |             loaded["matching_heuristics"] = True
36 |         return loaded
37 | 


--------------------------------------------------------------------------------
/det/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_cfg
2 | 
3 | __all__ = [
4 |     "get_cfg",
5 | ]
6 | 


--------------------------------------------------------------------------------
/det/config/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/config/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/det/config/__pycache__/defaults.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/defaults.cpython-36.pyc


--------------------------------------------------------------------------------
/det/config/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode
 2 | 
 3 | 
 4 | def get_cfg() -> CfgNode:
 5 |     """
 6 |     Get a copy of the default config.
 7 | 
 8 |     Returns:
 9 |         a detectron2 CfgNode instance.
10 |     """
11 |     from .defaults import _C
12 | 
13 |     return _C.clone()
14 | 


--------------------------------------------------------------------------------
/det/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import builtin  # ensure the builtin datasets are registered
2 | from .dataset_mapper import DatasetMapperWithBasis
3 | 
4 | 
5 | __all__ = ["DatasetMapperWithBasis"]
6 | #grid mask trick
7 | #https://github.com/Jia-Research-Lab/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py


--------------------------------------------------------------------------------
/det/data/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/data/__pycache__/builtin.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/builtin.cpython-36.pyc


--------------------------------------------------------------------------------
/det/data/__pycache__/dataset_mapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/dataset_mapper.cpython-36.pyc


--------------------------------------------------------------------------------
/det/data/__pycache__/detection_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/detection_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/det/data/builtin.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from detectron2.data.datasets.register_coco import register_coco_instances
 4 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
 5 | 
 6 | from .datasets.text import register_text_instances
 7 | 
 8 | # register plane reconstruction
 9 | 
10 | _PREDEFINED_SPLITS_PIC = {
11 |     "pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"),
12 |     "pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"),
13 | }
14 | 
15 | metadata_pic = {
16 |     "thing_classes": ["person"]
17 | }
18 | 
19 | _PREDEFINED_SPLITS_TEXT = {
20 |     "totaltext_train": ("totaltext/train_images", "totaltext/train.json"),
21 |     "totaltext_val": ("totaltext/test_images", "totaltext/test.json"),
22 |     "ctw1500_word_train": ("CTW1500/ctwtrain_text_image", "CTW1500/annotations/train_ctw1500_maxlen100_v2.json"),
23 |     "ctw1500_word_test": ("CTW1500/ctwtest_text_image","CTW1500/annotations/test_ctw1500_maxlen100.json"),
24 |     "syntext1_train": ("syntext1/images", "syntext1/annotations/train.json"),
25 |     "syntext2_train": ("syntext2/images", "syntext2/annotations/train.json"),
26 |     "mltbezier_word_train": ("mlt2017/images","mlt2017/annotations/train.json"),
27 | }
28 | 
29 | metadata_text = {
30 |     "thing_classes": ["text"]
31 | }
32 | 
33 | 
34 | def register_all_coco(root="datasets"):
35 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items():
36 |         # Assume pre-defined datasets live in `./datasets`.
37 |         register_coco_instances(
38 |             key,
39 |             metadata_pic,
40 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
41 |             os.path.join(root, image_root),
42 |         )
43 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items():
44 |         # Assume pre-defined datasets live in `./datasets`.
45 |         register_text_instances(
46 |             key,
47 |             metadata_text,
48 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
49 |             os.path.join(root, image_root),
50 |         )
51 | 
52 | 
53 | register_all_coco()


--------------------------------------------------------------------------------
/det/data/dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy as np
  3 | import os.path as osp
  4 | import torch
  5 | from fvcore.common.file_io import PathManager
  6 | from PIL import Image
  7 | import logging
  8 | 
  9 | from detectron2.data.dataset_mapper import DatasetMapper
 10 | from detectron2.data.detection_utils import SizeMismatchError
 11 | from detectron2.data import detection_utils as utils
 12 | from detectron2.data import transforms as T
 13 | 
 14 | from .detection_utils import (
 15 |     build_transform_gen,
 16 |     transform_instance_annotations,
 17 |     annotations_to_instances,
 18 |     gen_crop_transform_with_instance,
 19 | )
 20 | 
 21 | """
 22 | This file contains the default mapping that's applied to "dataset dicts".
 23 | """
 24 | 
 25 | __all__ = ["DatasetMapperWithBasis"]
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | class DatasetMapperWithBasis(DatasetMapper):
 31 |     """
 32 |     This caller enables the default Detectron2 mapper to read an additional basis semantic label
 33 |     """
 34 | 
 35 |     def __init__(self, cfg, is_train=True):
 36 |         super().__init__(cfg, is_train)
 37 | 
 38 |         # Rebuild transform gen
 39 |         logger.info("Rebuilding the transform generators. The previous generators will be overridden.")
 40 |         self.tfm_gens = build_transform_gen(cfg, is_train)
 41 | 
 42 |         # fmt: off
 43 |         self.basis_loss_on  = cfg.MODEL.BASIS_MODULE.LOSS_ON
 44 |         self.ann_set        = cfg.MODEL.BASIS_MODULE.ANN_SET
 45 |         self.crop_box       = cfg.INPUT.CROP.CROP_INSTANCE
 46 |         # fmt: on
 47 | 
 48 |     def __call__(self, dataset_dict):
 49 |         """
 50 |         Args:
 51 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
 52 | 
 53 |         Returns:
 54 |             dict: a format that builtin models in detectron2 accept
 55 |         """
 56 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
 57 |         # USER: Write your own image loading if it's not from a file
 58 |         try:
 59 |             image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
 60 |         except Exception as e:
 61 |             print(dataset_dict["file_name"])
 62 |             print(e)
 63 |             raise e
 64 |         try:
 65 |             utils.check_image_size(dataset_dict, image)
 66 |         except SizeMismatchError as e:
 67 |             expected_wh = (dataset_dict["width"], dataset_dict["height"])
 68 |             image_wh = (image.shape[1], image.shape[0])
 69 |             if (image_wh[1], image_wh[0]) == expected_wh:
 70 |                 print("transposing image {}".format(dataset_dict["file_name"]))
 71 |                 image = image.transpose(1, 0, 2)
 72 |             else:
 73 |                 raise e
 74 | 
 75 |         if "annotations" not in dataset_dict or len(dataset_dict["annotations"]) == 0:
 76 |             image, transforms = T.apply_transform_gens(
 77 |                 ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
 78 |             )
 79 |         else:
 80 |             # Crop around an instance if there are instances in the image.
 81 |             # USER: Remove if you don't use cropping
 82 |             if self.crop_gen:
 83 |                 crop_tfm = gen_crop_transform_with_instance(
 84 |                     self.crop_gen.get_crop_size(image.shape[:2]),
 85 |                     image.shape[:2],
 86 |                     dataset_dict["annotations"],
 87 |                     crop_box=self.crop_box,
 88 |                 )
 89 |                 image = crop_tfm.apply_image(image)
 90 |             try:
 91 |                 image, transforms = T.apply_transform_gens(self.tfm_gens, image)
 92 |             except ValueError as e:
 93 |                 print(dataset_dict["file_name"])
 94 |                 raise e
 95 |             if self.crop_gen:
 96 |                 transforms = crop_tfm + transforms
 97 | 
 98 |         image_shape = image.shape[:2]  # h, w
 99 | 
100 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
101 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
102 |         # Therefore it's important to use torch.Tensor.
103 |         dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
104 |         # Can use uint8 if it turns out to be slow some day
105 | 
106 |         # USER: Remove if you don't use pre-computed proposals.
107 |         if self.load_proposals:
108 |             utils.transform_proposals(
109 |                 dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
110 |             )
111 | 
112 |         if not self.is_train:
113 |             dataset_dict.pop("annotations", None)
114 |             dataset_dict.pop("sem_seg_file_name", None)
115 |             dataset_dict.pop("pano_seg_file_name", None)
116 |             return dataset_dict
117 | 
118 |         if "annotations" in dataset_dict:
119 |             # USER: Modify this if you want to keep them for some reason.
120 |             for anno in dataset_dict["annotations"]:
121 |                 if not self.mask_on:
122 |                     anno.pop("segmentation", None)
123 |                 if not self.keypoint_on:
124 |                     anno.pop("keypoints", None)
125 | 
126 |             # USER: Implement additional transformations if you have other types of data
127 |             annos = [
128 |                 transform_instance_annotations(
129 |                     obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
130 |                 )
131 |                 for obj in dataset_dict.pop("annotations")
132 |                 if obj.get("iscrowd", 0) == 0
133 |             ]
134 |             instances = annotations_to_instances(
135 |                 annos, image_shape, mask_format=self.mask_format
136 |             )
137 |             # Create a tight bounding box from masks, useful when image is cropped
138 |             if self.crop_gen and instances.has("gt_masks"):
139 |                 instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
140 |             dataset_dict["instances"] = utils.filter_empty_instances(instances)
141 | 
142 |         # USER: Remove if you don't do semantic/panoptic segmentation.
143 |         if "sem_seg_file_name" in dataset_dict:
144 |             with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
145 |                 sem_seg_gt = Image.open(f)
146 |                 sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
147 |             sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
148 |             sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
149 |             dataset_dict["sem_seg"] = sem_seg_gt
150 | 
151 |         if self.basis_loss_on and self.is_train:
152 |             # load basis supervisions
153 |             if self.ann_set == "coco":
154 |                 basis_sem_path = dataset_dict["file_name"].replace('train2017', 'thing_train2017').replace('image/train', 'thing_train')
155 |             else:
156 |                 basis_sem_path = dataset_dict["file_name"].replace('coco', 'lvis').replace('train2017', 'thing_train')
157 |             # change extension to npz
158 |             basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
159 |             basis_sem_gt = np.load(basis_sem_path)["mask"]
160 |             basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
161 |             basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
162 |             dataset_dict["basis_sem"] = basis_sem_gt
163 |         return dataset_dict
164 | 


--------------------------------------------------------------------------------
/det/data/datasets/__pycache__/text.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/datasets/__pycache__/text.cpython-36.pyc


--------------------------------------------------------------------------------
/det/data/datasets/fast_augment.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | 
 3 | import torch 
 4 | 
 5 | from .augment_lists import *
 6 | 
 7 | class RandAugment(object):
 8 |     def __init__(self, n, m):
 9 |         self.n = n
10 |         self.m = m
11 |         self.augment_list = rand_augment_list()
12 |         self.topil = ToPIL()
13 | 
14 |     def __call__(self, img):
15 |         img = self.topil(img)
16 |         ops = random.choices(self.augment_list, k=self.n)
17 |         for op, minval, maxval in ops:
18 |             if random.random() > random.uniform(0.2, 0.8):
19 |                 continue
20 |             val = (float(self.m) / 30) * float(maxval - minval) + minval
21 |             img = op(img, val)
22 |         return img
23 | 
24 | 
25 | class ToPIL(object):
26 |     """Convert image from ndarray format to PIL
27 |     """
28 |     def __call__(self, img):
29 |         x = Image.fromarray(img.asnumpy())
30 |         return x
31 | 
32 | # class ToNDArray(object):
33 | #     def __call__(self, img):
34 | #         x = mx.nd.array(np.array(img), .cpu(0))
35 | #         return x
36 | 
37 | class AugmentationBlock(object):
38 |     r"""
39 |     AutoAugment Block
40 |     Example
41 |     -------
42 |     >>> aa_transform = AugmentationBlock(autoaug_imagenet_policies())
43 |     """
44 |     def __init__(self, policies):
45 |         """
46 |         plicies : list of (name, pr, level)
47 |         """
48 |         super().__init__()
49 |         self.policies = policies
50 |         self.topil = ToPIL()
51 |         #self.tond = ToNDArray()
52 | 
53 |     def forward(self, img):
54 |         img = self.topil(img)
55 |         policy = random.choice(self.policies)
56 |         for name, pr, level in policy:
57 |             if random.random() > pr:
58 |                 continue
59 |             img = apply_augment(img, name, level)
60 |         #img = self.tond(img)
61 |         return img
62 | 


--------------------------------------------------------------------------------
/det/data/detection_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | 
  6 | from detectron2.data import transforms as T
  7 | from detectron2.data.detection_utils import transform_instance_annotations as d2_transform_inst_anno
  8 | from detectron2.data.detection_utils import annotations_to_instances as d2_anno_to_inst
  9 | from detectron2.structures import BoxMode
 10 | 
 11 | 
 12 | def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
 13 |     """
 14 |     Generate a CropTransform so that the cropping region contains
 15 |     the center of the given instance.
 16 | 
 17 |     Args:
 18 |         crop_size (tuple): h, w in pixels
 19 |         image_size (tuple): h, w
 20 |         instance (dict): an annotation dict of one instance, in Detectron2's
 21 |             dataset format.
 22 |     """
 23 |     instance = np.random.choice(instances),
 24 |     instance = instance[0]
 25 |     crop_size = np.asarray(crop_size, dtype=np.int32)
 26 |     bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
 27 |     center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
 28 |     assert (
 29 |         image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
 30 |     ), "The annotation bounding box is outside of the image!"
 31 |     assert (
 32 |         image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
 33 |     ), "Crop size is larger than image size!"
 34 | 
 35 |     min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
 36 |     max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
 37 |     max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
 38 | 
 39 |     y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
 40 |     x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
 41 | 
 42 |     # if some instance is cropped extend the box
 43 |     if not crop_box:
 44 |         modified = True
 45 |         while modified:
 46 |             modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
 47 | 
 48 |     return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
 49 | 
 50 | 
 51 | def adjust_crop(x0, y0, crop_size, instances):
 52 |     modified = False
 53 | 
 54 |     x1 = x0 + crop_size[1]
 55 |     y1 = y0 + crop_size[0]
 56 | 
 57 |     for instance in instances:
 58 |         bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
 59 | 
 60 |         if bbox[0] < x0 and bbox[2] > x0:
 61 |             crop_size[1] += x0 - bbox[0]
 62 |             x0 = bbox[0]
 63 |             modified = True
 64 | 
 65 |         if bbox[0] < x1 and bbox[2] > x1:
 66 |             crop_size[1] += bbox[2] - x1
 67 |             x1 = bbox[2]
 68 |             modified = True
 69 |         
 70 |         if bbox[1] < y0 and bbox[3] > y0:
 71 |             crop_size[0] += y0 - bbox[1]
 72 |             y0 = bbox[1]
 73 |             modified = True
 74 | 
 75 |         if bbox[1] < y1 and bbox[3] > y1:
 76 |             crop_size[0] += bbox[3] - y1
 77 |             y1 = bbox[3]
 78 |             modified = True
 79 | 
 80 |     return modified, x0, y0, crop_size
 81 |  
 82 | 
 83 | def transform_instance_annotations(
 84 |     annotation, transforms, image_size, *, keypoint_hflip_indices=None
 85 | ):
 86 | 
 87 |     annotation = d2_transform_inst_anno(
 88 |         annotation, transforms, image_size,
 89 |         keypoint_hflip_indices=keypoint_hflip_indices)
 90 |     
 91 |     if "beziers" in annotation:
 92 |         beziers = transform_beziers_annotations(
 93 |             annotation["beziers"], transforms
 94 |         )
 95 |         annotation["beziers"] = beziers
 96 |     return annotation
 97 | 
 98 | 
 99 | def transform_beziers_annotations(beziers, transforms):
100 |     """
101 |     Transform keypoint annotations of an image.
102 | 
103 |     Args:
104 |         beziers (list[float]): Nx16 float in Detectron2 Dataset format.
105 |         transforms (TransformList):
106 |     """
107 |     # (N*2,) -> (N, 2)
108 |     beziers = np.asarray(beziers, dtype="float64").reshape(-1, 2)
109 |     beziers = transforms.apply_coords(beziers).reshape(-1)
110 | 
111 |     # This assumes that HorizFlipTransform is the only one that does flip
112 |     do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
113 |     if do_hflip:
114 |         raise ValueError("Flipping text data is not supported (also disencouraged).")
115 | 
116 |     return beziers
117 | 
118 | 
119 | def annotations_to_instances(annos, image_size, mask_format="polygon"):
120 |     instance = d2_anno_to_inst(annos, image_size, mask_format)
121 | 
122 |     # add attributes
123 |     if "beziers" in annos[0]:
124 |         beziers = [obj.get("beziers", []) for obj in annos]
125 |         instance.beziers = torch.as_tensor(
126 |             beziers, dtype=torch.float32)
127 | 
128 |     if "rec" in annos[0]:
129 |         text = [obj.get("rec", []) for obj in annos]
130 |         instance.text = torch.as_tensor(
131 |             text, dtype=torch.int32)
132 |         
133 |     return instance
134 | 
135 | 
136 | def build_transform_gen(cfg, is_train):
137 |     """
138 |     With option to don't use hflip
139 | 
140 |     Returns:
141 |         list[TransformGen]
142 |     """
143 |     if is_train:
144 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
145 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
146 |         sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
147 |     else:
148 |         min_size = cfg.INPUT.MIN_SIZE_TEST
149 |         max_size = cfg.INPUT.MAX_SIZE_TEST
150 |         sample_style = "choice"
151 |     if sample_style == "range":
152 |         assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(
153 |             len(min_size)
154 |         )
155 | 
156 |     logger = logging.getLogger(__name__)
157 |     tfm_gens = []
158 |     tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
159 |     if is_train:
160 |         if cfg.INPUT.HFLIP_TRAIN:
161 |             tfm_gens.append(T.RandomFlip())
162 |         logger.info("TransformGens used in training: " + str(tfm_gens))
163 |     return tfm_gens
164 | 


--------------------------------------------------------------------------------
/det/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import DFConv2d
 2 | from .ml_nms import ml_nms
 3 | from .iou_loss import IOULoss
 4 | from .balanced_l1_loss import BalancedL1Loss, BoundedIoULoss
 5 | from .conv_with_kaiming_uniform import conv_with_kaiming_uniform
 6 | from .bezier_align import BezierAlign
 7 | from .def_roi_align import DefROIAlign
 8 | from .naive_group_norm import NaiveGroupNorm
 9 | from .gcn import GCN
10 | from .soft_nms import batched_soft_nms
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/det/layers/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/bezier_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/bezier_align.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/def_roi_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/def_roi_align.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/deform_conv.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/deform_conv.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/gcn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/gcn.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/iou_loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/iou_loss.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/ml_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/ml_nms.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/naive_group_norm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/naive_group_norm.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/__pycache__/soft_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/soft_nms.cpython-36.pyc


--------------------------------------------------------------------------------
/det/layers/balanced_l1_loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | import functools
  6 | 
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | def reduce_loss(loss, reduction):
 11 |     """Reduce loss as specified.
 12 |     Args:
 13 |         loss (Tensor): Elementwise loss tensor.
 14 |         reduction (str): Options are "none", "mean" and "sum".
 15 |     Return:
 16 |         Tensor: Reduced loss tensor.
 17 |     """
 18 |     reduction_enum = F._Reduction.get_enum(reduction)
 19 |     # none: 0, elementwise_mean:1, sum: 2
 20 |     if reduction_enum == 0:
 21 |         return loss
 22 |     elif reduction_enum == 1:
 23 |         return loss.mean()
 24 |     elif reduction_enum == 2:
 25 |         return loss.sum()
 26 | 
 27 | 
 28 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 29 |     """Apply element-wise weight and reduce loss.
 30 |     Args:
 31 |         loss (Tensor): Element-wise loss.
 32 |         weight (Tensor): Element-wise weights.
 33 |         reduction (str): Same as built-in losses of PyTorch.
 34 |         avg_factor (float): Avarage factor when computing the mean of losses.
 35 |     Returns:
 36 |         Tensor: Processed loss values.
 37 |     """
 38 |     # if weight is specified, apply element-wise weight
 39 |     if weight is not None:
 40 |         loss = loss * weight
 41 | 
 42 |     # if avg_factor is not specified, just reduce the loss
 43 |     if avg_factor is None:
 44 |         loss = reduce_loss(loss, reduction)
 45 |     else:
 46 |         # if reduction is mean, then average the loss by avg_factor
 47 |         if reduction == 'mean':
 48 |             loss = loss.sum() / avg_factor
 49 |         # if reduction is 'none', then do nothing, otherwise raise an error
 50 |         elif reduction != 'none':
 51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 52 |     return loss
 53 | 
 54 | 
 55 | def weighted_loss(loss_func):
 56 |     """Create a weighted version of a given loss function.
 57 |     To use this decorator, the loss function must have the signature like
 58 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
 59 |     element-wise loss without any reduction. This decorator will add weight
 60 |     and reduction arguments to the function. The decorated function will have
 61 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
 62 |     avg_factor=None, **kwargs)`.
 63 |     :Example:
 64 |     >>> @weighted_loss
 65 |     >>> def l1_loss(pred, target):
 66 |     >>>     return (pred - target).abs()
 67 |     >>> pred = torch.Tensor([0, 2, 3])
 68 |     >>> target = torch.Tensor([1, 1, 1])
 69 |     >>> weight = torch.Tensor([1, 0, 1])
 70 |     >>> l1_loss(pred, target)
 71 |     tensor(1.3333)
 72 |     >>> l1_loss(pred, target, weight)
 73 |     tensor(1.)
 74 |     >>> l1_loss(pred, target, reduction='none')
 75 |     tensor([1., 1., 2.])
 76 |     >>> l1_loss(pred, target, weight, avg_factor=2)
 77 |     tensor(1.5000)
 78 |     """
 79 | 
 80 |     @functools.wraps(loss_func)
 81 |     def wrapper(pred,
 82 |                 target,
 83 |                 weight=None,
 84 |                 reduction='mean',
 85 |                 avg_factor=None,
 86 |                 **kwargs):
 87 |         # get element-wise loss
 88 |         loss = loss_func(pred, target, **kwargs)
 89 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
 90 |         return loss
 91 | 
 92 |     return wrapper
 93 | 
 94 | 
 95 | 
 96 | @weighted_loss
 97 | def balanced_l1_loss(pred,
 98 |                      target,
 99 |                      beta=1.0,
100 |                      alpha=0.5,
101 |                      gamma=1.5,
102 |                      reduction='mean'):
103 |     assert beta > 0
104 |     assert pred.size() == target.size() and target.numel() > 0
105 | 
106 |     diff = torch.abs(pred - target)
107 |     b = np.e**(gamma / alpha) - 1
108 |     loss = torch.where(
109 |         diff < beta, alpha / b *
110 |         (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
111 |         gamma * diff + gamma / b - alpha * beta)
112 | 
113 |     return loss
114 | 
115 | @weighted_loss
116 | def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
117 |     """Improving Object Localization with Fitness NMS and Bounded IoU Loss,
118 |     https://arxiv.org/abs/1711.00164.
119 | 
120 |     Args:
121 |         pred (tensor): Predicted bboxes.
122 |         target (tensor): Target bboxes.
123 |         beta (float): beta parameter in smoothl1.
124 |         eps (float): eps to avoid NaN.
125 |     """
126 |     pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5
127 |     pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5
128 |     pred_w = pred[:, 2] - pred[:, 0]
129 |     pred_h = pred[:, 3] - pred[:, 1]
130 |     with torch.no_grad():
131 |         target_ctrx = (target[:, 0] + target[:, 2]) * 0.5
132 |         target_ctry = (target[:, 1] + target[:, 3]) * 0.5
133 |         target_w = target[:, 2] - target[:, 0]
134 |         target_h = target[:, 3] - target[:, 1]
135 | 
136 |     dx = target_ctrx - pred_ctrx
137 |     dy = target_ctry - pred_ctry
138 | 
139 |     loss_dx = 1 - torch.max(
140 |         (target_w - 2 * dx.abs()) /
141 |         (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx))
142 |     loss_dy = 1 - torch.max(
143 |         (target_h - 2 * dy.abs()) /
144 |         (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy))
145 |     loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w /
146 |                             (target_w + eps))
147 |     loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h /
148 |                             (target_h + eps))
149 |     loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh],
150 |                             dim=-1).view(loss_dx.size(0), -1)
151 | 
152 |     loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta,
153 |                        loss_comb - 0.5 * beta)
154 |     return loss
155 | 
156 | class BalancedL1Loss(nn.Module):
157 |     """Balanced L1 Loss
158 |     arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
159 |     """
160 | 
161 |     def __init__(self,
162 |                  alpha=0.5,
163 |                  gamma=1.5,
164 |                  beta=1.0,
165 |                  reduction='mean',
166 |                  loss_weight=1.0):
167 |         super(BalancedL1Loss, self).__init__()
168 |         self.alpha = alpha
169 |         self.gamma = gamma
170 |         self.beta = beta
171 |         self.reduction = reduction
172 |         self.loss_weight = loss_weight
173 | 
174 |     def forward(self,
175 |                 pred,
176 |                 target,
177 |                 weight=None,
178 |                 avg_factor=None,
179 |                 reduction_override=None,
180 |                 **kwargs):
181 |         assert reduction_override in (None, 'none', 'mean', 'sum')
182 |         reduction = (
183 |             reduction_override if reduction_override else self.reduction)
184 |         loss_bbox = self.loss_weight * balanced_l1_loss(
185 |             pred,
186 |             target,
187 |             weight,
188 |             alpha=self.alpha,
189 |             gamma=self.gamma,
190 |             beta=self.beta,
191 |             reduction=reduction,
192 |             avg_factor=avg_factor,
193 |             **kwargs)
194 |         return loss_bbox
195 | 
196 | 
197 | class BoundedIoULoss(nn.Module):
198 |     
199 |     def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0):
200 |         super(BoundedIoULoss, self).__init__()
201 |         self.beta = beta
202 |         self.eps = eps
203 |         self.reduction = reduction
204 |         self.loss_weight = loss_weight
205 | 
206 |     def forward(self,
207 |                 pred,
208 |                 target,
209 |                 weight=None,
210 |                 avg_factor=None,
211 |                 reduction_override=None,
212 |                 **kwargs):
213 |         if weight is not None and not torch.any(weight > 0):
214 |             return (pred * weight).sum()  # 0
215 |         assert reduction_override in (None, 'none', 'mean', 'sum')
216 |         reduction = (
217 |             reduction_override if reduction_override else self.reduction)
218 |         loss = self.loss_weight * bounded_iou_loss(
219 |             pred,
220 |             target,
221 |             weight,
222 |             beta=self.beta,
223 |             eps=self.eps,
224 |             reduction=reduction,
225 |             avg_factor=avg_factor,
226 |             **kwargs)
227 |         return loss
228 | 


--------------------------------------------------------------------------------
/det/layers/bezier_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from det import _C
 8 | 
 9 | 
10 | class _BezierAlign(Function):
11 |     @staticmethod
12 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
13 |         ctx.save_for_backward(roi)
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sampling_ratio = sampling_ratio
17 |         ctx.input_shape = input.size()
18 |         ctx.aligned = aligned
19 |         output = _C.bezier_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.bezier_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |             ctx.aligned,
44 |         )
45 |         return grad_input, None, None, None, None, None
46 | 
47 | 
48 | bezier_align = _BezierAlign.apply
49 | 
50 | 
51 | class BezierAlign(nn.Module):
52 |     def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
53 |         """
54 |         Args:
55 |             output_size (tuple): h, w
56 |             spatial_scale (float): scale the input boxes by this number
57 |             sampling_ratio (int): number of inputs samples to take for each output
58 |                 sample. 0 to take samples densely.
59 |             aligned (bool): if False, use the legacy implementation in
60 |                 Detectron. If True, align the results more perfectly.
61 | 
62 |         Note:
63 |             The meaning of aligned=True:
64 | 
65 |             With `aligned=True`,
66 |             we first appropriately scale the ROI and then shift it by -0.5
67 |             prior to calling bezier_align. This produces the correct neighbors; see
68 |             det/tests/test_bezier_align.py for verification.
69 | 
70 |             The difference does not make a difference to the model's performance if
71 |             ROIAlign is used together with conv layers.
72 |         """
73 |         super(BezierAlign, self).__init__()
74 |         self.output_size = output_size
75 |         self.spatial_scale = spatial_scale
76 |         self.sampling_ratio = sampling_ratio
77 |         self.aligned = aligned
78 | 
79 |     def forward(self, input, rois):
80 |         """
81 |         Args:
82 |             input: NCHW images
83 |             rois: Bx17 boxes. First column is the index into N. The other 16 columns are [xy]x8.
84 |         """
85 |         assert rois.dim() == 2 and rois.size(1) == 17
86 |         return bezier_align(
87 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
88 |         )
89 | 
90 |     def __repr__(self):
91 |         tmpstr = self.__class__.__name__ + "("
92 |         tmpstr += "output_size=" + str(self.output_size)
93 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
94 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
95 |         tmpstr += ", aligned=" + str(self.aligned)
96 |         tmpstr += ")"
97 |         return tmpstr
98 | 


--------------------------------------------------------------------------------
/det/layers/conv_with_kaiming_uniform.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from detectron2.layers import Conv2d
 4 | from .deform_conv import DFConv2d
 5 | from detectron2.layers.batch_norm import get_norm
 6 | 
 7 | 
 8 | def conv_with_kaiming_uniform(
 9 |         norm=None, activation=None,
10 |         use_deformable=False, use_sep=False):
11 |     def make_conv(
12 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
13 |     ):
14 |         if use_deformable:
15 |             conv_func = DFConv2d
16 |         else:
17 |             conv_func = Conv2d
18 |         if use_sep:
19 |             assert in_channels == out_channels
20 |             groups = in_channels
21 |         else:
22 |             groups = 1
23 |         conv = conv_func(
24 |             in_channels,
25 |             out_channels,
26 |             kernel_size=kernel_size,
27 |             stride=stride,
28 |             padding=dilation * (kernel_size - 1) // 2,
29 |             dilation=dilation,
30 |             groups=groups,
31 |             bias=(norm is None)
32 |         )
33 |         if not use_deformable:
34 |             # Caffe2 implementation uses XavierFill, which in fact
35 |             # corresponds to kaiming_uniform_ in PyTorch
36 |             nn.init.kaiming_uniform_(conv.weight, a=1)
37 |             if norm is None:
38 |                 nn.init.constant_(conv.bias, 0)
39 |         module = [conv,]
40 |         if norm is not None and len(norm) > 0:
41 |             if norm == "GN":
42 |                 norm_module = nn.GroupNorm(32, out_channels)
43 |             else:
44 |                 norm_module = get_norm(norm, out_channels)
45 |             module.append(norm_module)
46 |         if activation is not None:
47 |             module.append(nn.ReLU(inplace=True))
48 |         if len(module) > 1:
49 |             return nn.Sequential(*module)
50 |         return conv
51 | 
52 |     return make_conv
53 | 


--------------------------------------------------------------------------------
/det/layers/csrc/BezierAlign/BezierAlign.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace det {
  6 | 
  7 | at::Tensor BezierAlign_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio,
 14 |     bool aligned);
 15 | 
 16 | at::Tensor BezierAlign_backward_cpu(
 17 |     const at::Tensor& grad,
 18 |     const at::Tensor& rois,
 19 |     const float spatial_scale,
 20 |     const int pooled_height,
 21 |     const int pooled_width,
 22 |     const int batch_size,
 23 |     const int channels,
 24 |     const int height,
 25 |     const int width,
 26 |     const int sampling_ratio,
 27 |     bool aligned);
 28 | 
 29 | #ifdef WITH_CUDA
 30 | at::Tensor BezierAlign_forward_cuda(
 31 |     const at::Tensor& input,
 32 |     const at::Tensor& rois,
 33 |     const float spatial_scale,
 34 |     const int pooled_height,
 35 |     const int pooled_width,
 36 |     const int sampling_ratio,
 37 |     bool aligned);
 38 | 
 39 | at::Tensor BezierAlign_backward_cuda(
 40 |     const at::Tensor& grad,
 41 |     const at::Tensor& rois,
 42 |     const float spatial_scale,
 43 |     const int pooled_height,
 44 |     const int pooled_width,
 45 |     const int batch_size,
 46 |     const int channels,
 47 |     const int height,
 48 |     const int width,
 49 |     const int sampling_ratio,
 50 |     bool aligned);
 51 | #endif
 52 | 
 53 | // Interface for Python
 54 | inline at::Tensor BezierAlign_forward(
 55 |     const at::Tensor& input,
 56 |     const at::Tensor& rois,
 57 |     const float spatial_scale,
 58 |     const int pooled_height,
 59 |     const int pooled_width,
 60 |     const int sampling_ratio,
 61 |     bool aligned) {
 62 |   if (input.type().is_cuda()) {
 63 | #ifdef WITH_CUDA
 64 |     return BezierAlign_forward_cuda(
 65 |         input,
 66 |         rois,
 67 |         spatial_scale,
 68 |         pooled_height,
 69 |         pooled_width,
 70 |         sampling_ratio,
 71 |         aligned);
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   return BezierAlign_forward_cpu(
 77 |       input,
 78 |       rois,
 79 |       spatial_scale,
 80 |       pooled_height,
 81 |       pooled_width,
 82 |       sampling_ratio,
 83 |       aligned);
 84 | }
 85 | 
 86 | inline at::Tensor BezierAlign_backward(
 87 |     const at::Tensor& grad,
 88 |     const at::Tensor& rois,
 89 |     const float spatial_scale,
 90 |     const int pooled_height,
 91 |     const int pooled_width,
 92 |     const int batch_size,
 93 |     const int channels,
 94 |     const int height,
 95 |     const int width,
 96 |     const int sampling_ratio,
 97 |     bool aligned) {
 98 |   if (grad.type().is_cuda()) {
 99 | #ifdef WITH_CUDA
100 |     return BezierAlign_backward_cuda(
101 |         grad,
102 |         rois,
103 |         spatial_scale,
104 |         pooled_height,
105 |         pooled_width,
106 |         batch_size,
107 |         channels,
108 |         height,
109 |         width,
110 |         sampling_ratio,
111 |         aligned);
112 | #else
113 |     AT_ERROR("Not compiled with GPU support");
114 | #endif
115 |   }
116 |   return BezierAlign_backward_cpu(
117 |       grad,
118 |       rois,
119 |       spatial_scale,
120 |       pooled_height,
121 |       pooled_width,
122 |       batch_size,
123 |       channels,
124 |       height,
125 |       width,
126 |       sampling_ratio,
127 |       aligned);
128 | }
129 | 
130 | } // namespace detectron2
131 | 


--------------------------------------------------------------------------------
/det/layers/csrc/DefROIAlign/DefROIAlign.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <torch/types.h>
  3 | 
  4 | namespace det {
  5 | 
  6 | #ifdef WITH_CUDA
  7 | at::Tensor DefROIAlign_forward_cuda(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const at::Tensor& offsets,  // def added
 11 |     const float spatial_scale,
 12 |     const int pooled_height,
 13 |     const int pooled_width,
 14 |     const int sampling_ratio,
 15 |     const float trans_std,  // def added
 16 |     bool aligned);
 17 | 
 18 | at::Tensor DefROIAlign_backward_cuda(
 19 |     const at::Tensor& input,  // def added
 20 |     const at::Tensor& grad,
 21 |     const at::Tensor& rois,
 22 |     const at::Tensor& offsets,  // def added
 23 |     const at::Tensor& grad_offsets,  // def added
 24 |     const float spatial_scale,
 25 |     const int pooled_height,
 26 |     const int pooled_width,
 27 |     const int batch_size,
 28 |     const int channels,
 29 |     const int height,
 30 |     const int width,
 31 |     const int sampling_ratio,
 32 |     const float trans_std,  // def added
 33 |     bool aligned);
 34 | #endif
 35 | 
 36 | // Interface for Python
 37 | inline at::Tensor DefROIAlign_forward(
 38 |     const at::Tensor& input,
 39 |     const at::Tensor& rois,
 40 |     const at::Tensor& offsets,  // def added
 41 |     const float spatial_scale,
 42 |     const int pooled_height,
 43 |     const int pooled_width,
 44 |     const int sampling_ratio,
 45 |     const float trans_std,  // def added
 46 |     bool aligned) {
 47 |   if (input.type().is_cuda()) {
 48 | #ifdef WITH_CUDA
 49 |     return DefROIAlign_forward_cuda(
 50 |         input,
 51 |         rois,
 52 |         offsets,
 53 |         spatial_scale,
 54 |         pooled_height,
 55 |         pooled_width,
 56 |         sampling_ratio,
 57 |         trans_std,
 58 |         aligned);
 59 | #else
 60 |     AT_ERROR("Not compiled with GPU support");
 61 | #endif
 62 |   }
 63 |   AT_ERROR("CPU version not supported");
 64 | }
 65 | 
 66 | inline at::Tensor DefROIAlign_backward(
 67 |     const at::Tensor& input,  // def added
 68 |     const at::Tensor& grad,
 69 |     const at::Tensor& rois,
 70 |     const at::Tensor& offsets,  // def added
 71 |     const at::Tensor& grad_offsets,  // def added
 72 |     const float spatial_scale,
 73 |     const int pooled_height,
 74 |     const int pooled_width,
 75 |     const int batch_size,
 76 |     const int channels,
 77 |     const int height,
 78 |     const int width,
 79 |     const int sampling_ratio,
 80 |     const float trans_std,  // def added
 81 |     bool aligned) {
 82 |   if (grad.type().is_cuda()) {
 83 | #ifdef WITH_CUDA
 84 |     return DefROIAlign_backward_cuda(
 85 |         input,  // def added
 86 |         grad,
 87 |         rois,
 88 |         offsets,  // def added
 89 |         grad_offsets, // def added
 90 |         spatial_scale,
 91 |         pooled_height,
 92 |         pooled_width,
 93 |         batch_size,
 94 |         channels,
 95 |         height,
 96 |         width,
 97 |         sampling_ratio,
 98 |         trans_std, // def added
 99 |         aligned);
100 | #else
101 |     AT_ERROR("Not compiled with GPU support");
102 | #endif
103 |   }
104 |   AT_ERROR("CPU version not supported");
105 | }
106 | 
107 | } // namespace det
108 | 


--------------------------------------------------------------------------------
/det/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | #include <cuda_runtime_api.h>
2 | 
3 | namespace det {
4 | int get_cudart_version() {
5 |   return CUDART_VERSION;
6 | }
7 | } // namespace det
8 | 


--------------------------------------------------------------------------------
/det/layers/csrc/ml_nms/ml_nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <THC/THC.h>
  5 | #include <THC/THCDeviceUtils.cuh>
  6 | 
  7 | #include <vector>
  8 | #include <iostream>
  9 | 
 10 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 11 | 
 12 | __device__ inline float devIoU(float const * const a, float const * const b) {
 13 |   if (a[5] != b[5]) {
 14 |     return 0.0;
 15 |   }
 16 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 17 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 18 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 19 |   float interS = width * height;
 20 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 21 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 22 |   return interS / (Sa + Sb - interS);
 23 | }
 24 | 
 25 | __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 26 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 27 |   const int row_start = blockIdx.y;
 28 |   const int col_start = blockIdx.x;
 29 | 
 30 |   // if (row_start > col_start) return;
 31 | 
 32 |   const int row_size =
 33 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 34 |   const int col_size =
 35 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 36 | 
 37 |   __shared__ float block_boxes[threadsPerBlock * 6];
 38 |   if (threadIdx.x < col_size) {
 39 |     block_boxes[threadIdx.x * 6 + 0] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
 41 |     block_boxes[threadIdx.x * 6 + 1] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
 43 |     block_boxes[threadIdx.x * 6 + 2] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
 45 |     block_boxes[threadIdx.x * 6 + 3] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
 47 |     block_boxes[threadIdx.x * 6 + 4] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
 49 |     block_boxes[threadIdx.x * 6 + 5] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
 51 |   }
 52 |   __syncthreads();
 53 | 
 54 |   if (threadIdx.x < row_size) {
 55 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 56 |     const float *cur_box = dev_boxes + cur_box_idx * 6;
 57 |     int i = 0;
 58 |     unsigned long long t = 0;
 59 |     int start = 0;
 60 |     if (row_start == col_start) {
 61 |       start = threadIdx.x + 1;
 62 |     }
 63 |     for (i = start; i < col_size; i++) {
 64 |       if (devIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) {
 65 |         t |= 1ULL << i;
 66 |       }
 67 |     }
 68 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 69 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 70 |   }
 71 | }
 72 | 
 73 | namespace det {
 74 | 
 75 | // boxes is a N x 6 tensor
 76 | at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
 77 |   using scalar_t = float;
 78 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 79 |   auto scores = boxes.select(1, 4);
 80 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 81 |   auto boxes_sorted = boxes.index_select(0, order_t);
 82 | 
 83 |   int boxes_num = boxes.size(0);
 84 | 
 85 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 86 | 
 87 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 88 | 
 89 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 90 | 
 91 |   unsigned long long* mask_dev = NULL;
 92 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 93 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 94 | 
 95 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 96 | 
 97 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 98 |               THCCeilDiv(boxes_num, threadsPerBlock));
 99 |   dim3 threads(threadsPerBlock);
100 |   ml_nms_kernel<<<blocks, threads>>>(boxes_num,
101 |                                   nms_overlap_thresh,
102 |                                   boxes_dev,
103 |                                   mask_dev);
104 | 
105 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
106 |   THCudaCheck(cudaMemcpy(&mask_host[0],
107 |                         mask_dev,
108 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
109 |                         cudaMemcpyDeviceToHost));
110 | 
111 |   std::vector<unsigned long long> remv(col_blocks);
112 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
113 | 
114 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
115 |   int64_t* keep_out = keep.data<int64_t>();
116 | 
117 |   int num_to_keep = 0;
118 |   for (int i = 0; i < boxes_num; i++) {
119 |     int nblock = i / threadsPerBlock;
120 |     int inblock = i % threadsPerBlock;
121 | 
122 |     if (!(remv[nblock] & (1ULL << inblock))) {
123 |       keep_out[num_to_keep++] = i;
124 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
125 |       for (int j = nblock; j < col_blocks; j++) {
126 |         remv[j] |= p[j];
127 |       }
128 |     }
129 |   }
130 | 
131 |   THCudaFree(state, mask_dev);
132 |   // TODO improve this part
133 |   return std::get<0>(order_t.index({
134 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
135 |                          order_t.device(), keep.scalar_type())
136 |                      }).sort(0, false));
137 | }
138 | 
139 | } // namespace det


--------------------------------------------------------------------------------
/det/layers/csrc/ml_nms/ml_nms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | namespace det {
 5 | 
 6 | 
 7 | #ifdef WITH_CUDA
 8 | at::Tensor ml_nms_cuda(
 9 |     const at::Tensor dets,
10 |     const float threshold);
11 | #endif
12 | 
13 | at::Tensor ml_nms(const at::Tensor& dets,
14 |                   const at::Tensor& scores,
15 |                   const at::Tensor& labels,
16 |                   const float threshold) {
17 | 
18 |   if (dets.type().is_cuda()) {
19 | #ifdef WITH_CUDA
20 |     // TODO raise error if not compiled with CUDA
21 |     if (dets.numel() == 0)
22 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
23 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
24 |     return ml_nms_cuda(b, threshold);
25 | #else
26 |     AT_ERROR("Not compiled with GPU support");
27 | #endif
28 |   }
29 |   AT_ERROR("CPU version not implemented");
30 | }
31 | 
32 | } // namespace det
33 | 


--------------------------------------------------------------------------------
/det/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include "ml_nms/ml_nms.h"
 4 | #include "DefROIAlign/DefROIAlign.h"
 5 | #include "BezierAlign/BezierAlign.h"
 6 | 
 7 | namespace det {
 8 | 
 9 | #ifdef WITH_CUDA
10 | extern int get_cudart_version();
11 | #endif
12 | 
13 | std::string get_cuda_version() {
14 | #ifdef WITH_CUDA
15 |   std::ostringstream oss;
16 | 
17 |   // copied from
18 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
19 |   auto printCudaStyleVersion = [&](int v) {
20 |     oss << (v / 1000) << "." << (v / 10 % 100);
21 |     if (v % 10 != 0) {
22 |       oss << "." << (v % 10);
23 |     }
24 |   };
25 |   printCudaStyleVersion(get_cudart_version());
26 |   return oss.str();
27 | #else
28 |   return std::string("not available");
29 | #endif
30 | }
31 | 
32 | // similar to
33 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
34 | std::string get_compiler_version() {
35 |   std::ostringstream ss;
36 | #if defined(__GNUC__)
37 | #ifndef __clang__
38 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
39 | #endif
40 | #endif
41 | 
42 | #if defined(__clang_major__)
43 |   {
44 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
45 |        << __clang_patchlevel__;
46 |   }
47 | #endif
48 | 
49 | #if defined(_MSC_VER)
50 |   { ss << "MSVC " << _MSC_FULL_VER; }
51 | #endif
52 |   return ss.str();
53 | }
54 | 
55 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
56 |   m.def("ml_nms", &ml_nms, "Multi-Label NMS");
57 |   m.def("def_roi_align_forward", &DefROIAlign_forward, "def_roi_align_forward");
58 |   m.def("def_roi_align_backward", &DefROIAlign_backward, "def_roi_align_backward");
59 |   m.def("bezier_align_forward", &BezierAlign_forward, "bezier_align_forward");
60 |   m.def("bezier_align_backward", &BezierAlign_backward, "bezier_align_backward");
61 | }
62 | 
63 | } // namespace det
64 | 


--------------------------------------------------------------------------------
/det/layers/def_roi_align.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from det import _C
  8 | 
  9 | 
 10 | class _DefROIAlign(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, input, roi, offsets, output_size, spatial_scale, sampling_ratio, trans_std, aligned):
 13 |         ctx.save_for_backward(input, roi, offsets)
 14 |         ctx.output_size = _pair(output_size)
 15 |         ctx.spatial_scale = spatial_scale
 16 |         ctx.sampling_ratio = sampling_ratio
 17 |         ctx.trans_std = trans_std
 18 |         ctx.input_shape = input.size()
 19 |         ctx.aligned = aligned
 20 |         output = _C.def_roi_align_forward(
 21 |             input, roi, offsets, spatial_scale, output_size[0], output_size[1],
 22 |             sampling_ratio, trans_std, aligned
 23 |         )
 24 |         return output
 25 | 
 26 |     @staticmethod
 27 |     @once_differentiable
 28 |     def backward(ctx, grad_output):
 29 |         data, rois, offsets = ctx.saved_tensors
 30 |         output_size = ctx.output_size
 31 |         spatial_scale = ctx.spatial_scale
 32 |         sampling_ratio = ctx.sampling_ratio
 33 |         trans_std = ctx.trans_std
 34 |         bs, ch, h, w = ctx.input_shape
 35 |         grad_offsets = torch.zeros_like(offsets)
 36 | 
 37 |         grad_input = _C.def_roi_align_backward(
 38 |             data,
 39 |             grad_output,
 40 |             rois,
 41 |             offsets,
 42 |             grad_offsets,
 43 |             spatial_scale,
 44 |             output_size[0],
 45 |             output_size[1],
 46 |             bs,
 47 |             ch,
 48 |             h,
 49 |             w,
 50 |             sampling_ratio,
 51 |             trans_std,
 52 |             ctx.aligned,
 53 |         )
 54 |         return grad_input, None, grad_offsets, None, None, None, None, None
 55 | 
 56 | 
 57 | def_roi_align = _DefROIAlign.apply
 58 | 
 59 | 
 60 | class DefROIAlign(nn.Module):
 61 |     def __init__(self, output_size, spatial_scale,
 62 |                  sampling_ratio, trans_std, aligned=True):
 63 |         """
 64 |         Args:
 65 |             output_size (tuple): h, w
 66 |             spatial_scale (float): scale the input boxes by this number
 67 |             sampling_ratio (int): number of inputs samples to take for each output
 68 |                 sample. 0 to take samples densely.
 69 |             trans_std (float): offset scale according to the normalized roi size
 70 |             aligned (bool): if False, use the legacy implementation in
 71 |                 Detectron. If True, align the results more perfectly.
 72 |         """
 73 |         super(DefROIAlign, self).__init__()
 74 |         self.output_size = output_size
 75 |         self.spatial_scale = spatial_scale
 76 |         self.sampling_ratio = sampling_ratio
 77 |         self.trans_std = trans_std
 78 |         self.aligned = aligned
 79 | 
 80 |     def forward(self, input, rois, offsets):
 81 |         """
 82 |         Args:
 83 |             input: NCHW images
 84 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
 85 |         """
 86 |         assert rois.dim() == 2 and rois.size(1) == 5
 87 |         return def_roi_align(
 88 |             input, rois, offsets, self.output_size,
 89 |             self.spatial_scale, self.sampling_ratio,
 90 |             self.trans_std, self.aligned
 91 |         )
 92 | 
 93 |     def __repr__(self):
 94 |         tmpstr = self.__class__.__name__ + "("
 95 |         tmpstr += "output_size=" + str(self.output_size)
 96 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
 97 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
 98 |         tmpstr += ", trans_std=" + str(self.trans_std)
 99 |         tmpstr += ", aligned=" + str(self.aligned)
100 |         tmpstr += ")"
101 |         return tmpstr
102 | 


--------------------------------------------------------------------------------
/det/layers/deform_conv.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from detectron2.layers import Conv2d
  5 | 
  6 | 
  7 | class _NewEmptyTensorOp(torch.autograd.Function):
  8 |     @staticmethod
  9 |     def forward(ctx, x, new_shape):
 10 |         ctx.shape = x.shape
 11 |         return x.new_empty(new_shape)
 12 | 
 13 |     @staticmethod
 14 |     def backward(ctx, grad):
 15 |         shape = ctx.shape
 16 |         return _NewEmptyTensorOp.apply(grad, shape), None
 17 | 
 18 | 
 19 | class DFConv2d(nn.Module):
 20 |     """
 21 |     Deformable convolutional layer with configurable
 22 |     deformable groups, dilations and groups.
 23 | 
 24 |     Code is from:
 25 |     https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py
 26 | 
 27 | 
 28 |     """
 29 |     def __init__(
 30 |             self,
 31 |             in_channels,
 32 |             out_channels,
 33 |             with_modulated_dcn=True,
 34 |             kernel_size=3,
 35 |             stride=1,
 36 |             groups=1,
 37 |             dilation=1,
 38 |             deformable_groups=1,
 39 |             bias=False,
 40 |             padding=None
 41 |     ):
 42 |         super(DFConv2d, self).__init__()
 43 |         if isinstance(kernel_size, (list, tuple)):
 44 |             assert isinstance(stride, (list, tuple))
 45 |             assert isinstance(dilation, (list, tuple))
 46 |             assert len(kernel_size) == 2
 47 |             assert len(stride) == 2
 48 |             assert len(dilation) == 2
 49 |             padding = (
 50 |                 dilation[0] * (kernel_size[0] - 1) // 2,
 51 |                 dilation[1] * (kernel_size[1] - 1) // 2
 52 |             )
 53 |             offset_base_channels = kernel_size[0] * kernel_size[1]
 54 |         else:
 55 |             padding = dilation * (kernel_size - 1) // 2
 56 |             offset_base_channels = kernel_size * kernel_size
 57 |         if with_modulated_dcn:
 58 |             from detectron2.layers.deform_conv import ModulatedDeformConv
 59 |             offset_channels = offset_base_channels * 3  # default: 27
 60 |             conv_block = ModulatedDeformConv
 61 |         else:
 62 |             from detectron2.layers.deform_conv import DeformConv
 63 |             offset_channels = offset_base_channels * 2  # default: 18
 64 |             conv_block = DeformConv
 65 |         self.offset = Conv2d(
 66 |             in_channels,
 67 |             deformable_groups * offset_channels,
 68 |             kernel_size=kernel_size,
 69 |             stride=stride,
 70 |             padding=padding,
 71 |             groups=1,
 72 |             dilation=dilation
 73 |         )
 74 |         for l in [self.offset, ]:
 75 |             nn.init.kaiming_uniform_(l.weight, a=1)
 76 |             torch.nn.init.constant_(l.bias, 0.)
 77 |         self.conv = conv_block(
 78 |             in_channels,
 79 |             out_channels,
 80 |             kernel_size=kernel_size,
 81 |             stride=stride,
 82 |             padding=padding,
 83 |             dilation=dilation,
 84 |             groups=groups,
 85 |             deformable_groups=deformable_groups,
 86 |             bias=bias
 87 |         )
 88 |         self.with_modulated_dcn = with_modulated_dcn
 89 |         self.kernel_size = kernel_size
 90 |         self.stride = stride
 91 |         self.padding = padding
 92 |         self.dilation = dilation
 93 |         self.offset_split = offset_base_channels * deformable_groups * 2
 94 | 
 95 |     def forward(self, x, return_offset=False):
 96 |         if x.numel() > 0:
 97 |             if not self.with_modulated_dcn:
 98 |                 offset_mask = self.offset(x)
 99 |                 x = self.conv(x, offset_mask)
100 |             else:
101 |                 offset_mask = self.offset(x)
102 |                 offset = offset_mask[:, :self.offset_split, :, :]
103 |                 mask = offset_mask[:, self.offset_split:, :, :].sigmoid()
104 |                 x = self.conv(x, offset, mask)
105 |             if return_offset:
106 |                 return x, offset_mask
107 |             return x
108 |         # get output shape
109 |         output_shape = [
110 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
111 |             for i, p, di, k, d in zip(
112 |                 x.shape[-2:],
113 |                 self.padding,
114 |                 self.dilation,
115 |                 self.kernel_size,
116 |                 self.stride
117 |             )
118 |         ]
119 |         output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape
120 |         return _NewEmptyTensorOp.apply(x, output_shape)
121 | 


--------------------------------------------------------------------------------
/det/layers/gcn.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Conv2D(nn.Module):
 7 |     def __init__(self, in_channels, out_channels, kernel_size, padding='same',
 8 |                  stride=1, dilation=1, groups=1):
 9 |         super(Conv2D, self).__init__()
10 | 
11 |         assert type(kernel_size) in [int, tuple], "Allowed kernel type [int or tuple], not {}".format(type(kernel_size))
12 |         assert padding == 'same', "Allowed padding type {}, not {}".format('same', padding)
13 | 
14 |         self.kernel_size = kernel_size
15 |         if isinstance(kernel_size, tuple):
16 |             self.h_kernel = kernel_size[0]
17 |             self.w_kernel = kernel_size[1]
18 |         else:
19 |             self.h_kernel = kernel_size
20 |             self.w_kernel = kernel_size
21 | 
22 |         self.padding = padding
23 |         self.stride = stride
24 |         self.dilation = dilation
25 |         self.groups = groups
26 |         self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
27 |                               stride=self.stride, dilation=self.dilation, groups=self.groups)
28 | 
29 |     def forward(self, x):
30 | 
31 |         if self.padding == 'same':
32 | 
33 |             height, width = x.shape[2:]
34 | 
35 |             h_pad_need = max(0, (height - 1) * self.stride + self.h_kernel - height)
36 |             w_pad_need = max(0, (width - 1) * self.stride + self.w_kernel - width)
37 | 
38 |             pad_left = w_pad_need // 2
39 |             pad_right = w_pad_need - pad_left
40 |             pad_top = h_pad_need // 2
41 |             pad_bottom = h_pad_need - pad_top
42 | 
43 |             padding = (pad_left, pad_right, pad_top, pad_bottom)
44 | 
45 |             x = F.pad(x, padding, 'constant', 0)
46 | 
47 |         x = self.conv(x)
48 | 
49 |         return x
50 | 
51 | 
52 | class GCN(nn.Module):
53 |     """
54 |         Large Kernel Matters -- https://arxiv.org/abs/1703.02719
55 |     """
56 |     def __init__(self, in_channels, out_channels, k=3):
57 |         super(GCN, self).__init__()
58 | 
59 |         self.conv_l1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
60 |         self.conv_l2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
61 | 
62 |         self.conv_r1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
63 |         self.conv_r2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
64 | 
65 |     def forward(self, x):
66 |         x1 = self.conv_l1(x)
67 |         x1 = self.conv_l2(x1)
68 | 
69 |         x2 = self.conv_r1(x)
70 |         x2 = self.conv_r2(x2)
71 | 
72 |         out = x1 + x2
73 | 
74 |         return out
75 | 


--------------------------------------------------------------------------------
/det/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class IOULoss(nn.Module):
 6 |     """
 7 |     Intersetion Over Union (IoU) loss which supports three
 8 |     different IoU computations:
 9 | 
10 |     * IoU
11 |     * Linear IoU
12 |     * gIoU
13 |     """
14 |     def __init__(self, loc_loss_type='iou'):
15 |         super(IOULoss, self).__init__()
16 |         self.loc_loss_type = loc_loss_type
17 | 
18 |     def forward(self, pred, target, weight=None):
19 |         """
20 |         Args:
21 |             pred: Nx4 predicted bounding boxes
22 |             target: Nx4 target bounding boxes
23 |             weight: N loss weight for each instance
24 |         """
25 |         pred_left = pred[:, 0]
26 |         pred_top = pred[:, 1]
27 |         pred_right = pred[:, 2]
28 |         pred_bottom = pred[:, 3]
29 | 
30 |         target_left = target[:, 0]
31 |         target_top = target[:, 1]
32 |         target_right = target[:, 2]
33 |         target_bottom = target[:, 3]
34 | 
35 |         target_aera = (target_left + target_right) * \
36 |                       (target_top + target_bottom)
37 |         pred_aera = (pred_left + pred_right) * \
38 |                     (pred_top + pred_bottom)
39 | 
40 |         w_intersect = torch.min(pred_left, target_left) + \
41 |                       torch.min(pred_right, target_right)
42 |         h_intersect = torch.min(pred_bottom, target_bottom) + \
43 |                       torch.min(pred_top, target_top)
44 | 
45 |         g_w_intersect = torch.max(pred_left, target_left) + \
46 |                         torch.max(pred_right, target_right)
47 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + \
48 |                         torch.max(pred_top, target_top)
49 |         ac_uion = g_w_intersect * g_h_intersect
50 | 
51 |         area_intersect = w_intersect * h_intersect
52 |         area_union = target_aera + pred_aera - area_intersect
53 | 
54 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
55 |         gious = ious - (ac_uion - area_union) / ac_uion
56 |         if self.loc_loss_type == 'iou':
57 |             losses = -torch.log(ious)
58 |         elif self.loc_loss_type == 'linear_iou':
59 |             losses = 1 - ious
60 |         elif self.loc_loss_type == 'giou':
61 |             losses = 1 - gious
62 |         else:
63 |             raise NotImplementedError
64 | 
65 |         if weight is not None:
66 |             return (losses * weight).sum()
67 |         else:
68 |             return losses.sum()
69 | 


--------------------------------------------------------------------------------
/det/layers/ml_nms.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import batched_nms
 2 | from .soft_nms import batched_soft_nms
 3 | 
 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1,
 5 |            score_field="scores", label_field="labels"):
 6 |     """
 7 |     Performs non-maximum suppression on a boxlist, with scores specified
 8 |     in a boxlist field via score_field.
 9 |     
10 |     Args:
11 |         boxlist (detectron2.structures.Boxes): 
12 |         nms_thresh (float): 
13 |         max_proposals (int): if > 0, then only the top max_proposals are kept
14 |             after non-maximum suppression
15 |         score_field (str): 
16 |     """
17 |     if nms_thresh <= 0:
18 |         return boxlist
19 |     boxes = boxlist.pred_boxes.tensor
20 |     scores = boxlist.scores
21 |     labels = boxlist.pred_classes
22 |     keep = batched_nms(boxes, scores, labels, nms_thresh)
23 |     print(type(keep))
24 |     quit()
25 |     if max_proposals > 0:
26 |         keep = keep[: max_proposals]
27 |     boxlist = boxlist[keep]
28 |     return boxlist
29 | 


--------------------------------------------------------------------------------
/det/layers/naive_group_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import Module, Parameter
 3 | from torch.nn import init
 4 | 
 5 | 
 6 | class NaiveGroupNorm(Module):
 7 |     r"""NaiveGroupNorm implements Group Normalization with the high-level matrix operations in PyTorch.
 8 |     It is a temporary solution to export GN by ONNX before the official GN can be exported by ONNX.
 9 |     The usage of NaiveGroupNorm is exactly the same as the official :class:`torch.nn.GroupNorm`.
10 |     Args:
11 |         num_groups (int): number of groups to separate the channels into
12 |         num_channels (int): number of channels expected in input
13 |         eps: a value added to the denominator for numerical stability. Default: 1e-5
14 |         affine: a boolean value that when set to ``True``, this module
15 |             has learnable per-channel affine parameters initialized to ones (for weights)
16 |             and zeros (for biases). Default: ``True``.
17 | 
18 |     Shape:
19 |         - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
20 |         - Output: :math:`(N, C, *)` (same shape as input)
21 | 
22 |     Examples::
23 | 
24 |         >>> input = torch.randn(20, 6, 10, 10)
25 |         >>> # Separate 6 channels into 3 groups
26 |         >>> m = NaiveGroupNorm(3, 6)
27 |         >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
28 |         >>> m = NaiveGroupNorm(6, 6)
29 |         >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
30 |         >>> m = NaiveGroupNorm(1, 6)
31 |         >>> # Activating the module
32 |         >>> output = m(input)
33 | 
34 |     .. _`Group Normalization`: https://arxiv.org/abs/1803.08494
35 |     """
36 |     __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight',
37 |                      'bias']
38 | 
39 |     def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
40 |         super(NaiveGroupNorm, self).__init__()
41 |         self.num_groups = num_groups
42 |         self.num_channels = num_channels
43 |         self.eps = eps
44 |         self.affine = affine
45 |         if self.affine:
46 |             self.weight = Parameter(torch.Tensor(num_channels))
47 |             self.bias = Parameter(torch.Tensor(num_channels))
48 |         else:
49 |             self.register_parameter('weight', None)
50 |             self.register_parameter('bias', None)
51 |         self.reset_parameters()
52 | 
53 |     def reset_parameters(self):
54 |         if self.affine:
55 |             init.ones_(self.weight)
56 |             init.zeros_(self.bias)
57 | 
58 |     def forward(self, input):
59 |         N, C, H, W = input.size()
60 |         assert C % self.num_groups == 0
61 |         input = input.reshape(N, self.num_groups, -1)
62 |         mean = input.mean(dim=-1, keepdim=True)
63 |         var = (input ** 2).mean(dim=-1, keepdim=True) - mean ** 2
64 |         std = torch.sqrt(var + self.eps)
65 | 
66 |         input = (input - mean) / std
67 |         input = input.reshape(N, C, H, W)
68 |         if self.affine:
69 |             input = input * self.weight.reshape(1, C, 1, 1) + self.bias.reshape(1, C, 1, 1)
70 |         return input
71 | 
72 |     def extra_repr(self):
73 |         return '{num_groups}, {num_channels}, eps={eps}, ' \
74 |             'affine={affine}'.format(**self.__dict__)
75 | 


--------------------------------------------------------------------------------
/det/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .fcos import FCOS
 3 | from .atss import ATSS
 4 | from .backbone import build_fcos_resnet_fpn_backbone
 5 | from .one_stage_detector import OneStageDetector, OneStageRCNN
 6 | from .roi_heads import LibraRCNNROIHeads
 7 | 
 8 | from .rpn_utils import  ModuleListDial, Scale, BoxCoder, permute_and_flatten, concat_box_prediction_layers, ATSSAnchorGenerator
 9 | 
10 | _EXCLUDE = {"torch", "ShapeSpec"}
11 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]


--------------------------------------------------------------------------------
/det/modeling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/__pycache__/one_stage_detector.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/one_stage_detector.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/__pycache__/rpn_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/rpn_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/atss/__init__.py:
--------------------------------------------------------------------------------
1 | from .atss import ATSS 


--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/atss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import build_fcos_resnet_fpn_backbone, build_resnest_fpn_backbone
2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone
3 | from .dla import build_fcos_dla_fpn_backbone
4 | from .resnet_lpf import build_resnet_lpf_backbone
5 | from  .resnet import build_resnest_backbone 


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/dla.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/dla.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/fpn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/fpn.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/lpf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/lpf.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/splat.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/splat.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch.nn.functional as F
  3 | import fvcore.nn.weight_init as weight_init
  4 | 
  5 | from detectron2.modeling.backbone import FPN, build_resnet_backbone
  6 | from detectron2.layers import ShapeSpec
  7 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
  8 | 
  9 | from .resnet_lpf import build_resnet_lpf_backbone
 10 | from .resnet_interval import build_resnet_interval_backbone
 11 | from .mobilenet import build_mnv2_backbone
 12 | from  .resnet import build_resnest_backbone
 13 | 
 14 | class LastLevelP6P7(nn.Module):
 15 |     """
 16 |     This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from
 17 |     C5 or P5 feature.
 18 |     """
 19 | 
 20 |     def __init__(self, in_channels, out_channels, in_features="res5"):
 21 |         super().__init__()
 22 |         self.num_levels = 2
 23 |         self.in_feature = in_features
 24 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 25 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
 26 |         for module in [self.p6, self.p7]:
 27 |             weight_init.c2_xavier_fill(module)
 28 | 
 29 |     def forward(self, x):
 30 |         p6 = self.p6(x)
 31 |         p7 = self.p7(F.relu(p6))
 32 |         return [p6, p7]
 33 | 
 34 | 
 35 | class LastLevelP6(nn.Module):
 36 |     """
 37 |     This module is used in FCOS to generate extra layers
 38 |     """
 39 | 
 40 |     def __init__(self, in_channels, out_channels, in_features="res5"):
 41 |         super().__init__()
 42 |         self.num_levels = 1
 43 |         self.in_feature = in_features
 44 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 45 |         for module in [self.p6]:
 46 |             weight_init.c2_xavier_fill(module)
 47 | 
 48 |     def forward(self, x):
 49 |         p6 = self.p6(x)
 50 |         return [p6]
 51 | 
 52 | 
 53 | @BACKBONE_REGISTRY.register()
 54 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
 55 |     """
 56 |     Args:
 57 |         cfg: a detectron2 CfgNode
 58 | 
 59 |     Returns:
 60 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
 61 |     """
 62 |     if cfg.MODEL.BACKBONE.ANTI_ALIAS:
 63 |         bottom_up = build_resnet_lpf_backbone(cfg, input_shape)
 64 |     elif cfg.MODEL.MOBILENET:
 65 |         bottom_up = build_mnv2_backbone(cfg, input_shape)
 66 |     elif cfg.MODEL.RESNEST:
 67 |         bottom_up = build_resnest_backbone(cfg, input_shape)    
 68 |     else:
 69 |         bottom_up = build_resnet_backbone(cfg, input_shape)
 70 |     in_features = cfg.MODEL.FPN.IN_FEATURES
 71 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
 72 |     top_levels = cfg.MODEL.FCOS.TOP_LEVELS
 73 |     in_channels_top = out_channels
 74 |     if top_levels == 2:
 75 |         top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
 76 |     if top_levels == 1:
 77 |         top_block = LastLevelP6(in_channels_top, out_channels, "p5")
 78 |     elif top_levels == 0:
 79 |         top_block = None
 80 |     backbone = FPN(
 81 |         bottom_up=bottom_up,
 82 |         in_features=in_features,
 83 |         out_channels=out_channels,
 84 |         norm=cfg.MODEL.FPN.NORM,
 85 |         top_block=top_block,
 86 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
 87 |     )
 88 |     return backbone
 89 | 
 90 | @BACKBONE_REGISTRY.register()
 91 | def build_resnest_fpn_backbone(cfg, input_shape: ShapeSpec):
 92 |     if cfg.MODEL.RESNEST:
 93 |         bottom_up = build_resnest_backbone(cfg, input_shape)    
 94 |     else:
 95 |         bottom_up = build_resnet_backbone(cfg, input_shape)
 96 |     in_features = cfg.MODEL.FPN.IN_FEATURES
 97 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
 98 |     top_levels = cfg.MODEL.FCOS.TOP_LEVELS
 99 |     in_channels_top = out_channels
100 |     if top_levels == 2:
101 |         top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
102 |     if top_levels == 1:
103 |         top_block = LastLevelP6(in_channels_top, out_channels, "p5")
104 |     elif top_levels == 0:
105 |         top_block = None
106 |     backbone = FPN(
107 |         bottom_up=bottom_up,
108 |         in_features=in_features,
109 |         out_channels=out_channels,
110 |         norm=cfg.MODEL.FPN.NORM,
111 |         top_block=top_block,
112 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
113 |     )
114 |     return backbone    
115 |     
116 | 
117 | #BiFPN
118 | #https://github.com/sxhxliang/detectron2_backbone/blob/master/detectron2_backbone/backbone/bifpn.py


--------------------------------------------------------------------------------
/det/modeling/backbone/lpf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.parallel
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from IPython import embed
  7 | 
  8 | 
  9 | class Downsample(nn.Module):
 10 |     def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
 11 |         super(Downsample, self).__init__()
 12 |         self.filt_size = filt_size
 13 |         self.pad_off = pad_off
 14 |         self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))]
 15 |         self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes]
 16 |         self.stride = stride
 17 |         self.off = int((self.stride-1)/2.)
 18 |         self.channels = channels
 19 | 
 20 |         # print('Filter size [%i]'%filt_size)
 21 |         if(self.filt_size==1):
 22 |             a = np.array([1.,])
 23 |         elif(self.filt_size==2):
 24 |             a = np.array([1., 1.])
 25 |         elif(self.filt_size==3):
 26 |             a = np.array([1., 2., 1.])
 27 |         elif(self.filt_size==4):    
 28 |             a = np.array([1., 3., 3., 1.])
 29 |         elif(self.filt_size==5):    
 30 |             a = np.array([1., 4., 6., 4., 1.])
 31 |         elif(self.filt_size==6):    
 32 |             a = np.array([1., 5., 10., 10., 5., 1.])
 33 |         elif(self.filt_size==7):    
 34 |             a = np.array([1., 6., 15., 20., 15., 6., 1.])
 35 | 
 36 |         filt = torch.Tensor(a[:,None]*a[None,:])
 37 |         filt = filt/torch.sum(filt)
 38 |         self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1)))
 39 | 
 40 |         self.pad = get_pad_layer(pad_type)(self.pad_sizes)
 41 | 
 42 |     def forward(self, inp):
 43 |         if(self.filt_size==1):
 44 |             if(self.pad_off==0):
 45 |                 return inp[:,:,::self.stride,::self.stride]    
 46 |             else:
 47 |                 return self.pad(inp)[:,:,::self.stride,::self.stride]
 48 |         else:
 49 |             return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
 50 | 
 51 | def get_pad_layer(pad_type):
 52 |     if(pad_type in ['refl','reflect']):
 53 |         PadLayer = nn.ReflectionPad2d
 54 |     elif(pad_type in ['repl','replicate']):
 55 |         PadLayer = nn.ReplicationPad2d
 56 |     elif(pad_type=='zero'):
 57 |         PadLayer = nn.ZeroPad2d
 58 |     else:
 59 |         print('Pad type [%s] not recognized'%pad_type)
 60 |     return PadLayer
 61 | 
 62 | 
 63 | class Downsample1D(nn.Module):
 64 |     def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
 65 |         super(Downsample1D, self).__init__()
 66 |         self.filt_size = filt_size
 67 |         self.pad_off = pad_off
 68 |         self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))]
 69 |         self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes]
 70 |         self.stride = stride
 71 |         self.off = int((self.stride - 1) / 2.)
 72 |         self.channels = channels
 73 | 
 74 |         # print('Filter size [%i]' % filt_size)
 75 |         if(self.filt_size == 1):
 76 |             a = np.array([1., ])
 77 |         elif(self.filt_size == 2):
 78 |             a = np.array([1., 1.])
 79 |         elif(self.filt_size == 3):
 80 |             a = np.array([1., 2., 1.])
 81 |         elif(self.filt_size == 4):
 82 |             a = np.array([1., 3., 3., 1.])
 83 |         elif(self.filt_size == 5):
 84 |             a = np.array([1., 4., 6., 4., 1.])
 85 |         elif(self.filt_size == 6):
 86 |             a = np.array([1., 5., 10., 10., 5., 1.])
 87 |         elif(self.filt_size == 7):
 88 |             a = np.array([1., 6., 15., 20., 15., 6., 1.])
 89 | 
 90 |         filt = torch.Tensor(a)
 91 |         filt = filt / torch.sum(filt)
 92 |         self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1)))
 93 | 
 94 |         self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes)
 95 | 
 96 |     def forward(self, inp):
 97 |         if(self.filt_size == 1):
 98 |             if(self.pad_off == 0):
 99 |                 return inp[:, :, ::self.stride]
100 |             else:
101 |                 return self.pad(inp)[:, :, ::self.stride]
102 |         else:
103 |             return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
104 | 
105 | 
106 | def get_pad_layer_1d(pad_type):
107 |     if(pad_type in ['refl', 'reflect']):
108 |         PadLayer = nn.ReflectionPad1d
109 |     elif(pad_type in ['repl', 'replicate']):
110 |         PadLayer = nn.ReplicationPad1d
111 |     elif(pad_type == 'zero'):
112 |         PadLayer = nn.ZeroPad1d
113 |     else:
114 |         print('Pad type [%s] not recognized' % pad_type)
115 |     return PadLayer
116 | 


--------------------------------------------------------------------------------
/det/modeling/backbone/mobilenet.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/tonylins/pytorch-mobilenet-v2/
  2 | # Published by Ji Lin, tonylins
  3 | # licensed under the  Apache License, Version 2.0, January 2004
  4 | 
  5 | from torch import nn
  6 | from torch.nn import BatchNorm2d
  7 | #from detectron2.layers.batch_norm import NaiveSyncBatchNorm as BatchNorm2d
  8 | from detectron2.layers import Conv2d
  9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 10 | from detectron2.modeling.backbone import Backbone
 11 | 
 12 | 
 13 | def conv_bn(inp, oup, stride):
 14 |     return nn.Sequential(
 15 |         Conv2d(inp, oup, 3, stride, 1, bias=False),
 16 |         BatchNorm2d(oup),
 17 |         nn.ReLU6(inplace=True)
 18 |     )
 19 | 
 20 | 
 21 | def conv_1x1_bn(inp, oup):
 22 |     return nn.Sequential(
 23 |         Conv2d(inp, oup, 1, 1, 0, bias=False),
 24 |         BatchNorm2d(oup),
 25 |         nn.ReLU6(inplace=True)
 26 |     )
 27 | 
 28 | 
 29 | class InvertedResidual(nn.Module):
 30 |     def __init__(self, inp, oup, stride, expand_ratio):
 31 |         super(InvertedResidual, self).__init__()
 32 |         self.stride = stride
 33 |         assert stride in [1, 2]
 34 | 
 35 |         hidden_dim = int(round(inp * expand_ratio))
 36 |         self.use_res_connect = self.stride == 1 and inp == oup
 37 | 
 38 |         if expand_ratio == 1:
 39 |             self.conv = nn.Sequential(
 40 |                 # dw
 41 |                 Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 42 |                 BatchNorm2d(hidden_dim),
 43 |                 nn.ReLU6(inplace=True),
 44 |                 # pw-linear
 45 |                 Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 46 |                 BatchNorm2d(oup),
 47 |             )
 48 |         else:
 49 |             self.conv = nn.Sequential(
 50 |                 # pw
 51 |                 Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 52 |                 BatchNorm2d(hidden_dim),
 53 |                 nn.ReLU6(inplace=True),
 54 |                 # dw
 55 |                 Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 56 |                 BatchNorm2d(hidden_dim),
 57 |                 nn.ReLU6(inplace=True),
 58 |                 # pw-linear
 59 |                 Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 60 |                 BatchNorm2d(oup),
 61 |             )
 62 | 
 63 |     def forward(self, x):
 64 |         if self.use_res_connect:
 65 |             return x + self.conv(x)
 66 |         else:
 67 |             return self.conv(x)
 68 | 
 69 | 
 70 | class MobileNetV2(Backbone):
 71 |     """
 72 |     Should freeze bn
 73 |     """
 74 |     def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.):
 75 |         super(MobileNetV2, self).__init__()
 76 |         block = InvertedResidual
 77 |         input_channel = 32
 78 |         interverted_residual_setting = [
 79 |             # t, c, n, s
 80 |             [1, 16, 1, 1],
 81 |             [6, 24, 2, 2],
 82 |             [6, 32, 3, 2],
 83 |             [6, 64, 4, 2],
 84 |             [6, 96, 3, 1],
 85 |             [6, 160, 3, 2],
 86 |             [6, 320, 1, 1],
 87 |         ]
 88 | 
 89 |         # building first layer
 90 |         assert input_size % 32 == 0
 91 |         input_channel = int(input_channel * width_mult)
 92 |         self.return_features_indices = [3, 6, 13, 17]
 93 |         self.return_features_num_channels = []
 94 |         self.features = nn.ModuleList([conv_bn(3, input_channel, 2)])
 95 |         # building inverted residual blocks
 96 |         for t, c, n, s in interverted_residual_setting:
 97 |             output_channel = int(c * width_mult)
 98 |             for i in range(n):
 99 |                 if i == 0:
100 |                     self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
101 |                 else:
102 |                     self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
103 |                 input_channel = output_channel
104 |                 if len(self.features) - 1 in self.return_features_indices:
105 |                     self.return_features_num_channels.append(output_channel)
106 | 
107 |         self._initialize_weights()
108 |         self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT)
109 | 
110 |     def _freeze_backbone(self, freeze_at):
111 |         for layer_index in range(freeze_at):
112 |             for p in self.features[layer_index].parameters():
113 |                 p.requires_grad = False
114 | 
115 |     def forward(self, x):
116 |         res = []
117 |         for i, m in enumerate(self.features):
118 |             x = m(x)
119 |             if i in self.return_features_indices:
120 |                 res.append(x)
121 |         return {'res{}'.format(i + 2): r for i, r in enumerate(res)}
122 | 
123 |     def _initialize_weights(self):
124 |         for m in self.modules():
125 |             if isinstance(m, Conv2d):
126 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 |                 m.weight.data.normal_(0, (2. / n) ** 0.5)
128 |                 if m.bias is not None:
129 |                     m.bias.data.zero_()
130 |             elif isinstance(m, BatchNorm2d):
131 |                 m.weight.data.fill_(1)
132 |                 m.bias.data.zero_()
133 |             elif isinstance(m, nn.Linear):
134 |                 n = m.weight.size(1)
135 |                 m.weight.data.normal_(0, 0.01)
136 |                 m.bias.data.zero_()
137 | 
138 | @BACKBONE_REGISTRY.register()
139 | def build_mnv2_backbone(cfg, input_shape):
140 |     """
141 |     Create a ResNet instance from config.
142 | 
143 |     Returns:
144 |         ResNet: a :class:`ResNet` instance.
145 |     """
146 |     out_features = cfg.MODEL.RESNETS.OUT_FEATURES
147 | 
148 |     out_feature_channels = {"res2": 24, "res3": 32,
149 |                             "res4": 96, "res5": 320}
150 |     out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32}
151 |     model = MobileNetV2(cfg)
152 |     model._out_features = out_features
153 |     model._out_feature_channels = out_feature_channels
154 |     model._out_feature_strides = out_feature_strides
155 |     return model
156 | 


--------------------------------------------------------------------------------
/det/modeling/backbone/resnet_interval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | from detectron2.layers import FrozenBatchNorm2d
  3 | from detectron2.modeling.backbone import BACKBONE_REGISTRY
  4 | from detectron2.modeling.backbone.resnet import (
  5 |     BasicStem,
  6 |     DeformBottleneckBlock,
  7 |     BottleneckBlock,
  8 |     ResNet,
  9 | )
 10 | 
 11 | 
 12 | def make_stage_intervals(block_class, num_blocks, first_stride, **kwargs):
 13 |     """
 14 |     Create a resnet stage by creating many blocks.
 15 |     Args:
 16 |         block_class (class): a subclass of ResNetBlockBase
 17 |         num_blocks (int):
 18 |         first_stride (int): the stride of the first block. The other blocks will have stride=1.
 19 |             A `stride` argument will be passed to the block constructor.
 20 |         kwargs: other arguments passed to the block constructor.
 21 | 
 22 |     Returns:
 23 |         list[nn.Module]: a list of block module.
 24 |     """
 25 |     blocks = []
 26 |     conv_kwargs = {key: kwargs[key] for key in kwargs if "deform" not in key}
 27 |     deform_kwargs = {key: kwargs[key] for key in kwargs if key != "deform_interval"}
 28 |     deform_interval = kwargs.get("deform_interval", None)
 29 |     for i in range(num_blocks):
 30 |         if deform_interval and i % deform_interval == 0:
 31 |             blocks.append(block_class(stride=first_stride if i == 0 else 1, **deform_kwargs))
 32 |         else:
 33 |             blocks.append(BottleneckBlock(stride=first_stride if i == 0 else 1, **conv_kwargs))
 34 |         conv_kwargs["in_channels"] = conv_kwargs["out_channels"]
 35 |         deform_kwargs["in_channels"] = deform_kwargs["out_channels"]
 36 |     return blocks
 37 | 
 38 | 
 39 | @BACKBONE_REGISTRY.register()
 40 | def build_resnet_interval_backbone(cfg, input_shape):
 41 |     """
 42 |     Create a ResNet instance from config.
 43 | 
 44 |     Returns:
 45 |         ResNet: a :class:`ResNet` instance.
 46 |     """
 47 |     # need registration of new blocks/stems?
 48 |     norm = cfg.MODEL.RESNETS.NORM
 49 |     stem = BasicStem(
 50 |         in_channels=input_shape.channels,
 51 |         out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
 52 |         norm=norm,
 53 |     )
 54 |     freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT
 55 | 
 56 |     if freeze_at >= 1:
 57 |         for p in stem.parameters():
 58 |             p.requires_grad = False
 59 |         stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem)
 60 | 
 61 |     # fmt: off
 62 |     out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
 63 |     depth               = cfg.MODEL.RESNETS.DEPTH
 64 |     num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
 65 |     width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
 66 |     bottleneck_channels = num_groups * width_per_group
 67 |     in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
 68 |     out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
 69 |     stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
 70 |     res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
 71 |     deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
 72 |     deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
 73 |     deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
 74 |     deform_interval     = cfg.MODEL.RESNETS.DEFORM_INTERVAL
 75 |     # fmt: on
 76 |     assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
 77 | 
 78 |     num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
 79 | 
 80 |     stages = []
 81 | 
 82 |     # Avoid creating variables without gradients
 83 |     # It consumes extra memory and may cause allreduce to fail
 84 |     out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features]
 85 |     max_stage_idx = max(out_stage_idx)
 86 |     for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
 87 |         dilation = res5_dilation if stage_idx == 5 else 1
 88 |         first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
 89 |         stage_kargs = {
 90 |             "num_blocks": num_blocks_per_stage[idx],
 91 |             "first_stride": first_stride,
 92 |             "in_channels": in_channels,
 93 |             "bottleneck_channels": bottleneck_channels,
 94 |             "out_channels": out_channels,
 95 |             "num_groups": num_groups,
 96 |             "norm": norm,
 97 |             "stride_in_1x1": stride_in_1x1,
 98 |             "dilation": dilation,
 99 |         }
100 |         if deform_on_per_stage[idx]:
101 |             stage_kargs["block_class"] = DeformBottleneckBlock
102 |             stage_kargs["deform_modulated"] = deform_modulated
103 |             stage_kargs["deform_num_groups"] = deform_num_groups
104 |             stage_kargs["deform_interval"] = deform_interval
105 |         else:
106 |             stage_kargs["block_class"] = BottleneckBlock
107 |         blocks = make_stage_intervals(**stage_kargs)
108 |         in_channels = out_channels
109 |         out_channels *= 2
110 |         bottleneck_channels *= 2
111 | 
112 |         if freeze_at >= stage_idx:
113 |             for block in blocks:
114 |                 block.freeze()
115 |         stages.append(blocks)
116 |     return ResNet(stem, stages, out_features=out_features)
117 | 


--------------------------------------------------------------------------------
/det/modeling/backbone/splat.py:
--------------------------------------------------------------------------------
  1 | """Split-Attention"""
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as F
  6 | from torch.nn import Module, Linear, BatchNorm2d, ReLU
  7 | from torch.nn.modules.utils import _pair
  8 | 
  9 | from detectron2.layers import (
 10 |     Conv2d,
 11 |     DeformConv,
 12 |     FrozenBatchNorm2d,
 13 |     ModulatedDeformConv,
 14 |     ShapeSpec,
 15 |     get_norm,
 16 | )
 17 | 
 18 | __all__ = ['SplAtConv2d', 'SplAtConv2d_dcn']
 19 | 
 20 | class SplAtConv2d(Module):
 21 |     """Split-Attention Conv2d
 22 |     """
 23 |     def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
 24 |                  dilation=(1, 1), groups=1, bias=True,
 25 |                  radix=2, reduction_factor=4,
 26 |                  rectify=False, rectify_avg=False, norm=None,
 27 |                  dropblock_prob=0.0, **kwargs):
 28 |         super(SplAtConv2d, self).__init__()
 29 |         padding = _pair(padding)
 30 |         self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
 31 |         self.rectify_avg = rectify_avg
 32 |         inter_channels = max(in_channels*radix//reduction_factor, 32)
 33 |         self.radix = radix
 34 |         self.cardinality = groups
 35 |         self.channels = channels
 36 |         self.dropblock_prob = dropblock_prob
 37 |         if self.rectify:
 38 |             from rfconv import RFConv2d
 39 |             self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
 40 |                                  groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
 41 |         else:
 42 |             self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
 43 |                                groups=groups*radix, bias=bias, **kwargs)
 44 |         self.use_bn = norm is not None
 45 |         if self.use_bn:
 46 |             self.bn0 = get_norm(norm, channels*radix)
 47 |         self.relu = ReLU(inplace=True)
 48 |         self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
 49 |         if self.use_bn:
 50 |             self.bn1 = get_norm(norm, inter_channels)
 51 |         self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
 52 |         if dropblock_prob > 0.0:
 53 |             self.dropblock = DropBlock2D(dropblock_prob, 3)
 54 |         self.rsoftmax = rSoftMax(radix, groups)
 55 | 
 56 |     def forward(self, x):
 57 |         x = self.conv(x)
 58 |         if self.use_bn:
 59 |             x = self.bn0(x)
 60 |         if self.dropblock_prob > 0.0:
 61 |             x = self.dropblock(x)
 62 |         x = self.relu(x)
 63 | 
 64 |         batch, rchannel = x.shape[:2]
 65 |         if self.radix > 1:
 66 |             splited = torch.split(x, rchannel//self.radix, dim=1)
 67 |             gap = sum(splited) 
 68 |         else:
 69 |             gap = x
 70 |         gap = F.adaptive_avg_pool2d(gap, 1)
 71 |         gap = self.fc1(gap)
 72 | 
 73 |         if self.use_bn:
 74 |             gap = self.bn1(gap)
 75 |         gap = self.relu(gap)
 76 | 
 77 |         atten = self.fc2(gap)
 78 |         atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
 79 | 
 80 |         if self.radix > 1:
 81 |             attens = torch.split(atten, rchannel//self.radix, dim=1)
 82 |             out = sum([att*split for (att, split) in zip(attens, splited)])
 83 |         else:
 84 |             out = atten * x
 85 |         return out.contiguous()
 86 | 
 87 | class rSoftMax(nn.Module):
 88 |     def __init__(self, radix, cardinality):
 89 |         super().__init__()
 90 |         self.radix = radix
 91 |         self.cardinality = cardinality
 92 | 
 93 |     def forward(self, x):
 94 |         batch = x.size(0)
 95 |         if self.radix > 1:
 96 |             x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
 97 |             x = F.softmax(x, dim=1)
 98 |             x = x.reshape(batch, -1)
 99 |         else:
100 |             x = torch.sigmoid(x)
101 |         return x
102 | 
103 | 
104 | class SplAtConv2d_dcn(Module):
105 |     """Split-Attention Conv2d with dcn
106 |     """
107 |     def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
108 |                  dilation=(1, 1), groups=1, bias=True,
109 |                  radix=2, reduction_factor=4,
110 |                  rectify=False, rectify_avg=False, norm=None,
111 |                  dropblock_prob=0.0, 
112 |                  deform_conv_op=None,
113 |                  deformable_groups=1,
114 |                  deform_modulated=False,
115 |                  **kwargs):
116 |         super(SplAtConv2d_dcn, self).__init__()
117 |         self.deform_modulated = deform_modulated
118 | 
119 |         padding = _pair(padding)
120 |         self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
121 |         self.rectify_avg = rectify_avg
122 |         inter_channels = max(in_channels*radix//reduction_factor, 32)
123 |         self.radix = radix
124 |         self.cardinality = groups
125 |         self.channels = channels
126 |         self.dropblock_prob = dropblock_prob
127 |         if self.rectify:
128 |             from rfconv import RFConv2d
129 |             self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
130 |                                  groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
131 |         else:
132 |             self.conv = deform_conv_op(in_channels, channels*radix, kernel_size, stride, padding[0], dilation,
133 |                                groups=groups*radix, bias=bias, deformable_groups=deformable_groups, **kwargs)
134 |         self.use_bn = norm is not None
135 |         if self.use_bn:
136 |             self.bn0 = get_norm(norm, channels*radix)
137 |         self.relu = ReLU(inplace=True)
138 |         self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
139 |         if self.use_bn:
140 |             self.bn1 = get_norm(norm, inter_channels)
141 |         self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
142 |         if dropblock_prob > 0.0:
143 |             self.dropblock = DropBlock2D(dropblock_prob, 3)
144 |         self.rsoftmax = rSoftMax(radix, groups)
145 | 
146 |     def forward(self, x, offset_input):
147 | 
148 |         if self.deform_modulated: 
149 |             offset_x, offset_y, mask = torch.chunk(offset_input, 3, dim=1)
150 |             offset = torch.cat((offset_x, offset_y), dim=1)
151 |             mask = mask.sigmoid() 
152 |             x = self.conv(x, offset, mask)
153 |         else:
154 |             x = self.conv(x, offset_input)
155 | 
156 |         if self.use_bn:
157 |             x = self.bn0(x)
158 |         if self.dropblock_prob > 0.0:
159 |             x = self.dropblock(x)
160 |         x = self.relu(x)
161 | 
162 |         batch, rchannel = x.shape[:2]
163 |         if self.radix > 1:
164 |             splited = torch.split(x, rchannel//self.radix, dim=1)
165 |             gap = sum(splited) 
166 |         else:
167 |             gap = x
168 |         gap = F.adaptive_avg_pool2d(gap, 1)
169 |         gap = self.fc1(gap)
170 | 
171 |         if self.use_bn:
172 |             gap = self.bn1(gap)
173 |         gap = self.relu(gap)
174 | 
175 |         atten = self.fc2(gap)
176 |         atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
177 | 
178 |         if self.radix > 1:
179 |             attens = torch.split(atten, rchannel//self.radix, dim=1)
180 |             out = sum([att*split for (att, split) in zip(attens, splited)])
181 |         else:
182 |             out = atten * x
183 |         return out.contiguous()


--------------------------------------------------------------------------------
/det/modeling/fcos/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcos import FCOS
2 | 


--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/fcos.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/one_stage_detector.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from torch import nn
  3 | 
  4 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
  5 | from detectron2.modeling import ProposalNetwork, GeneralizedRCNN
  6 | from detectron2.utils.events import get_event_storage
  7 | from detectron2.utils.logger import log_first_n
  8 | from detectron2.modeling.postprocessing import detector_postprocess as d2_postprocesss
  9 | 
 10 | 
 11 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
 12 |     """
 13 |     In addition to the post processing of detectron2, we add scalign for 
 14 |     bezier control points.
 15 |     """
 16 |     scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
 17 |     results = d2_postprocesss(results, output_height, output_width, mask_threshold)
 18 | 
 19 |     # scale bezier points
 20 |     if results.has("beziers"):
 21 |         beziers = results.beziers
 22 |         # scale and clip in place
 23 |         beziers[:, 0::2] *= scale_x
 24 |         beziers[:, 1::2] *= scale_y
 25 |         h, w = results.image_size
 26 |         beziers[:, 0].clamp_(min=0, max=w)
 27 |         beziers[:, 1].clamp_(min=0, max=h)
 28 |         beziers[:, 6].clamp_(min=0, max=w)
 29 |         beziers[:, 7].clamp_(min=0, max=h)
 30 |         beziers[:, 8].clamp_(min=0, max=w)
 31 |         beziers[:, 9].clamp_(min=0, max=h)
 32 |         beziers[:, 14].clamp_(min=0, max=w)
 33 |         beziers[:, 15].clamp_(min=0, max=h)
 34 | 
 35 |     return results
 36 | 
 37 | 
 38 | @META_ARCH_REGISTRY.register()
 39 | class OneStageDetector(ProposalNetwork):
 40 |     """
 41 |     Same as :class:`detectron2.modeling.ProposalNetwork`.
 42 |     Uses "instances" as the return key instead of using "proposal".
 43 |     """
 44 |     def forward(self, batched_inputs):
 45 |         if self.training:
 46 |             return super().forward(batched_inputs)
 47 |         processed_results = super().forward(batched_inputs)
 48 |         processed_results = [{"instances": r["proposals"]} for r in processed_results]
 49 |         return processed_results
 50 | 
 51 | 
 52 | def build_top_module(cfg):
 53 |     top_type = cfg.MODEL.TOP_MODULE.NAME
 54 |     if top_type == "conv":
 55 |         inp = cfg.MODEL.FPN.OUT_CHANNELS
 56 |         oup = cfg.MODEL.TOP_MODULE.DIM
 57 |         top_module = nn.Conv2d(
 58 |             inp, oup,
 59 |             kernel_size=3, stride=1, padding=1)
 60 |     else:
 61 |         top_module = None
 62 |     return top_module
 63 | 
 64 | 
 65 | @META_ARCH_REGISTRY.register()
 66 | class OneStageRCNN(GeneralizedRCNN):
 67 |     """
 68 |     Same as :class:`detectron2.modeling.ProposalNetwork`.
 69 |     Use one stage detector and a second stage for instance-wise prediction.
 70 |     """
 71 |     def __init__(self, cfg):
 72 |         super().__init__(cfg)
 73 |         self.top_module = build_top_module(cfg)
 74 |         self.to(self.device)
 75 | 
 76 |     def forward(self, batched_inputs):
 77 |         """
 78 |         Args:
 79 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 80 |                 Each item in the list contains the inputs for one image.
 81 |                 For now, each item in the list is a dict that contains:
 82 | 
 83 |                 * image: Tensor, image in (C, H, W) format.
 84 |                 * instances (optional): groundtruth :class:`Instances`
 85 |                 * proposals (optional): :class:`Instances`, precomputed proposals.
 86 | 
 87 |                 Other information that's included in the original dicts, such as:
 88 | 
 89 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 90 |                   See :meth:`postprocess` for details.
 91 | 
 92 |         Returns:
 93 |             list[dict]:
 94 |                 Each dict is the output for one input image.
 95 |                 The dict contains one key "instances" whose value is a :class:`Instances`.
 96 |                 The :class:`Instances` object has the following keys:
 97 |                 "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
 98 |         """
 99 |         if not self.training:
100 |             return self.inference(batched_inputs)
101 | 
102 |         images = self.preprocess_image(batched_inputs)
103 |         if "instances" in batched_inputs[0]:
104 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
105 |         elif "targets" in batched_inputs[0]:
106 |             log_first_n(
107 |                 logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
108 |             )
109 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
110 |         else:
111 |             gt_instances = None
112 | 
113 |         features = self.backbone(images.tensor)
114 | 
115 |         if self.proposal_generator:
116 |             proposals, proposal_losses = self.proposal_generator( 
117 |                 images, features, gt_instances, self.top_module)
118 |         else:
119 |             assert "proposals" in batched_inputs[0]
120 |             proposals = [x["proposals"].to(self.device) for x in batched_inputs]
121 |             proposal_losses = {}
122 | 
123 |         _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
124 |         if self.vis_period > 0:
125 |             storage = get_event_storage()
126 |             if storage.iter % self.vis_period == 0:
127 |                 self.visualize_training(batched_inputs, proposals)
128 | 
129 |         losses = {}
130 |         losses.update(detector_losses)
131 |         losses.update(proposal_losses)
132 |         return losses
133 | 
134 |     def inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
135 |         """
136 |         Run inference on the given inputs.
137 | 
138 |         Args:
139 |             batched_inputs (list[dict]): same as in :meth:`forward`
140 |             detected_instances (None or list[Instances]): if not None, it
141 |                 contains an `Instances` object per image. The `Instances`
142 |                 object contains "pred_boxes" and "pred_classes" which are
143 |                 known boxes in the image.
144 |                 The inference will then skip the detection of bounding boxes,
145 |                 and only predict other per-ROI outputs.
146 |             do_postprocess (bool): whether to apply post-processing on the outputs.
147 | 
148 |         Returns:
149 |             same as in :meth:`forward`.
150 |         """
151 |         assert not self.training
152 | 
153 |         images = self.preprocess_image(batched_inputs)
154 |         features = self.backbone(images.tensor)
155 | 
156 |         if detected_instances is None:
157 |             if self.proposal_generator:
158 |                 proposals, _ = self.proposal_generator(
159 |                     images, features, None, self.top_module)
160 |             else:
161 |                 assert "proposals" in batched_inputs[0]
162 |                 proposals = [x["proposals"].to(self.device) for x in batched_inputs]
163 | 
164 |             results, _ = self.roi_heads(images, features, proposals, None)
165 |         else:
166 |             detected_instances = [x.to(self.device) for x in detected_instances]
167 |             results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
168 | 
169 |         if do_postprocess:
170 |             return OneStageRCNN._postprocess(results, batched_inputs, images.image_sizes)
171 |         else:
172 |             return results
173 | 
174 |     @staticmethod
175 |     def _postprocess(instances, batched_inputs, image_sizes):
176 |         """
177 |         Rescale the output instances to the target size.
178 |         """
179 |         # note: private function; subject to changes
180 |         processed_results = []
181 |         for results_per_image, input_per_image, image_size in zip(
182 |             instances, batched_inputs, image_sizes
183 |         ):
184 |             height = input_per_image.get("height", image_size[0])
185 |             width = input_per_image.get("width", image_size[1])
186 |             r = detector_postprocess(results_per_image, height, width)
187 |             processed_results.append({"instances": r})
188 |         return processed_results


--------------------------------------------------------------------------------
/det/modeling/poolers.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch
  3 | from torch import nn
  4 | from detectron2.layers import cat
  5 | 
  6 | from detectron2.modeling.poolers import (
  7 |     ROIPooler, convert_boxes_to_pooler_format, assign_boxes_to_levels
  8 | )
  9 | 
 10 | from det.layers import BezierAlign
 11 | from det.structures import Beziers
 12 | 
 13 | __all__ = ["TopPooler"]
 14 | 
 15 | 
 16 | def _box_max_size(boxes):
 17 |     box = boxes.tensor
 18 |     max_size = torch.max(box[:, 2] - box[:, 0], box[:, 3] - box[:, 1])
 19 |     return max_size
 20 | 
 21 | 
 22 | def _bezier_height(beziers):
 23 |     beziers = beziers.tensor
 24 |     # compute the distance between the first and last control point
 25 |     p1 = beziers[:, :2]
 26 |     p2 = beziers[:, 14:]
 27 |     height = ((p1 - p2) ** 2).sum(dim=1).sqrt()
 28 |     return height
 29 | 
 30 |     
 31 | def assign_boxes_to_levels_by_metric(
 32 |         box_lists, min_level, max_level, canonical_box_size,
 33 |         canonical_level, metric_fn=_box_max_size):
 34 |     """
 35 |     Map each box in `box_lists` to a feature map level index and return the assignment
 36 |     vector.
 37 | 
 38 |     Args:
 39 |         box_lists (list[detectron2.structures.Boxes]): A list of N Boxes or N RotatedBoxes,
 40 |             where N is the number of images in the batch.
 41 |         min_level (int): Smallest feature map level index. The input is considered index 0,
 42 |             the output of stage 1 is index 1, and so.
 43 |         max_level (int): Largest feature map level index.
 44 |         canonical_box_size (int): A canonical box size in pixels (shorter side).
 45 |         canonical_level (int): The feature map level index on which a canonically-sized box
 46 |             should be placed.
 47 | 
 48 |     Returns:
 49 |         A tensor of length M, where M is the total number of boxes aggregated over all
 50 |             N batch images. The memory layout corresponds to the concatenation of boxes
 51 |             from all images. Each element is the feature map index, as an offset from
 52 |             `self.min_level`, for the corresponding box (so value i means the box is at
 53 |             `self.min_level + i`).
 54 |     """
 55 |     eps = sys.float_info.epsilon
 56 |     box_sizes = cat([metric_fn(boxes) for boxes in box_lists])
 57 |     # Eqn.(1) in FPN paper
 58 |     level_assignments = torch.floor(
 59 |         canonical_level + torch.log2(box_sizes / canonical_box_size + eps)
 60 |     )
 61 |     level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
 62 |     return level_assignments.to(torch.int64) - min_level
 63 | 
 64 | 
 65 | def assign_boxes_to_levels_max(
 66 |         box_lists, min_level, max_level, canonical_box_size,
 67 |         canonical_level):
 68 |     return assign_boxes_to_levels_by_metric(
 69 |         box_lists, min_level, max_level, canonical_box_size,
 70 |         canonical_level, metric_fn=_box_max_size
 71 |     )
 72 | 
 73 | 
 74 | def assign_boxes_to_levels_bezier(
 75 |         box_lists, min_level, max_level, canonical_box_size,
 76 |         canonical_level):
 77 |     return assign_boxes_to_levels_by_metric(
 78 |         box_lists, min_level, max_level, canonical_box_size,
 79 |         canonical_level, metric_fn=_bezier_height
 80 |     )
 81 | 
 82 | 
 83 | class TopPooler(ROIPooler):
 84 |     """
 85 |     ROIPooler with option to assign level by max length. Used by top modules.
 86 |     """
 87 |     def __init__(self,
 88 |                  output_size,
 89 |                  scales,
 90 |                  sampling_ratio,
 91 |                  pooler_type,
 92 |                  canonical_box_size=224,
 93 |                  canonical_level=4,
 94 |                  assign_crit="area",):
 95 |         # to reuse the parent initialization, handle unsupported pooler types
 96 |         parent_pooler_type = "ROIAlign" if pooler_type == "BezierAlign" else pooler_type
 97 |         super().__init__(output_size, scales, sampling_ratio, parent_pooler_type,
 98 |                          canonical_box_size=canonical_box_size,
 99 |                          canonical_level=canonical_level)
100 |         if parent_pooler_type != pooler_type:
101 |             # reinit the level_poolers here
102 |             self.level_poolers = nn.ModuleList(
103 |                 BezierAlign(
104 |                     output_size, spatial_scale=scale,
105 |                     sampling_ratio=sampling_ratio) for scale in scales
106 |             )
107 |         self.assign_crit = assign_crit
108 | 
109 |     def forward(self, x, box_lists):
110 |         """
111 |         see 
112 |         """
113 |         num_level_assignments = len(self.level_poolers)
114 | 
115 |         assert isinstance(x, list) and isinstance(
116 |             box_lists, list
117 |         ), "Arguments to pooler must be lists"
118 |         assert (
119 |             len(x) == num_level_assignments
120 |         ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
121 |             num_level_assignments, len(x)
122 |         )
123 | 
124 |         assert len(box_lists) == x[0].size(
125 |             0
126 |         ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
127 |             x[0].size(0), len(box_lists)
128 |         )
129 | 
130 |         if isinstance(box_lists[0], torch.Tensor):
131 |             # TODO: use Beziers for data_mapper
132 |             box_lists = [Beziers(x) for x in box_lists]
133 |         pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
134 | 
135 |         if num_level_assignments == 1:
136 |             return self.level_poolers[0](x[0], pooler_fmt_boxes)
137 | 
138 |         if self.assign_crit == "max":
139 |             assign_method = assign_boxes_to_levels_max
140 |         elif self.assign_crit == "bezier":
141 |             assign_method = assign_boxes_to_levels_bezier
142 |         else:
143 |             assign_method = assign_boxes_to_levels
144 | 
145 |         level_assignments = assign_method(
146 |             box_lists, self.min_level, self.max_level,
147 |             self.canonical_box_size, self.canonical_level)
148 | 
149 |         num_boxes = len(pooler_fmt_boxes)
150 |         num_channels = x[0].shape[1]
151 |         output_size = self.output_size
152 | 
153 |         dtype, device = x[0].dtype, x[0].device
154 |         output = torch.zeros(
155 |             (num_boxes, num_channels, output_size[0], output_size[1]), dtype=dtype, device=device
156 |         )
157 | 
158 |         for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)):
159 |             inds = torch.nonzero(level_assignments == level).squeeze(1)
160 |             pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
161 |             output[inds] = pooler(x_level, pooler_fmt_boxes_level)
162 | 
163 |         return output
164 | 


--------------------------------------------------------------------------------
/det/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .libra_rcnn import LibraRCNNROIHeads
3 | 


--------------------------------------------------------------------------------
/det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc


--------------------------------------------------------------------------------
/det/modeling/roi_heads/libra_rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import logging
  3 | import numpy as np
  4 | import torch
  5 | from torch import nn
  6 | from torch.nn import functional as F
  7 | 
  8 | from detectron2.layers import ShapeSpec
  9 | from detectron2.structures import Boxes, Instances, pairwise_iou
 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs
 11 | from detectron2.layers import batched_nms, cat
 12 | from det.layers import BalancedL1Loss
 13 | from detectron2.modeling.roi_heads import (StandardROIHeads, ROI_HEADS_REGISTRY)
 14 | from detectron2.modeling.sampling import subsample_labels
 15 | 
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | 
 21 | class LibraCNNOutputs(FastRCNNOutputs):
 22 |     def __init__(
 23 |         self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, baclanced_l1_beta
 24 |         ):
 25 |         self.box2box_transform = box2box_transform
 26 |         self.num_preds_per_image = [len(p) for p in proposals]
 27 |         self.pred_class_logits = pred_class_logits
 28 |         self.pred_proposal_deltas = pred_proposal_deltas
 29 |         self.baclanced_l1_beta = baclanced_l1_beta
 30 | 
 31 |         box_type = type(proposals[0].proposal_boxes)
 32 |         # cat(..., dim=0) concatenates over all images in the batch
 33 |         self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
 34 |         assert not self.proposals.tensor.requires_grad, "Proposals should not require gradients!"
 35 |         self.image_shapes = [x.image_size for x in proposals]
 36 | 
 37 |         self.ba_l1_loss = BalancedL1Loss(alpha=0.5,
 38 |                  gamma=1.5,
 39 |                  beta=1.0,
 40 |                  reduction='mean',
 41 |                  loss_weight=1.0)
 42 | 
 43 |         # The following fields should exist only when training.
 44 |         if proposals[0].has("gt_boxes"):
 45 |             self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
 46 |             assert proposals[0].has("gt_classes")
 47 |             self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
 48 | 
 49 |     def baclanced_l1_loss(self):
 50 |         """
 51 |         Compute the baclanced L1 loss for box regression.
 52 | 
 53 |         Returns:
 54 |             scalar Tensor
 55 |         """
 56 |         gt_proposal_deltas = self.box2box_transform.get_deltas(
 57 |             self.proposals.tensor, self.gt_boxes.tensor
 58 |         )
 59 |         box_dim = gt_proposal_deltas.size(1)  # 4 or 5
 60 |         cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
 61 |         device = self.pred_proposal_deltas.device
 62 | 
 63 |         bg_class_ind = self.pred_class_logits.shape[1] - 1
 64 | 
 65 |         fg_inds = torch.nonzero((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)).squeeze(
 66 |             1
 67 |         )
 68 |         if cls_agnostic_bbox_reg:
 69 |             # pred_proposal_deltas only corresponds to foreground class for agnostic
 70 |             gt_class_cols = torch.arange(box_dim, device=device)
 71 |         else:
 72 |             fg_gt_classes = self.gt_classes[fg_inds]
 73 |             # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
 74 |             # where b is the dimension of box representation (4 or 5)
 75 |             # Note that compared to Detectron1,
 76 |             # we do not perform bounding box regression for background classes.
 77 |             gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
 78 |         
 79 |         
 80 |         loss_box_reg = self.ba_l1_loss(
 81 |             self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
 82 |             gt_proposal_deltas[fg_inds],
 83 |             self.baclanced_l1_beta,
 84 |             reduction="mean",
 85 |         )
 86 |       
 87 |         loss_box_reg = loss_box_reg / self.gt_classes.numel()
 88 |         return loss_box_reg
 89 | 
 90 |     def losses(self):
 91 |         """
 92 |         Compute the default losses for box head in Fast(er) R-CNN,
 93 |         with softmax cross entropy loss and smooth L1 loss.
 94 | 
 95 |         Returns:
 96 |             A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg".
 97 |         """
 98 |         return {
 99 |             "loss_cls": self.softmax_cross_entropy_loss(),
100 |             "loss_box_reg_ba": self.baclanced_l1_loss(),
101 |         }
102 | 
103 | @ROI_HEADS_REGISTRY.register()
104 | class LibraRCNNROIHeads(StandardROIHeads):
105 | 
106 |     def _sample_proposals(self, matched_idxs, matched_labels, gt_classes):
107 |         """
108 |         modified from roi_heads
109 |         """
110 |         has_gt = gt_classes.numel() > 0
111 |         # Get the corresponding GT for each proposal
112 |         if has_gt:
113 |             gt_classes = gt_classes[matched_idxs]
114 |             # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
115 |             gt_classes[matched_labels == 0] = self.num_classes
116 |             # Label ignore proposals (-1 label)
117 |             gt_classes[matched_labels == -1] = -1
118 |         else:
119 |             gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
120 | 
121 |         sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
122 |             gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes
123 |         )
124 | 
125 |         sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
126 |         return sampled_idxs, gt_classes[sampled_idxs]
127 | 
128 | 
129 |     def __forward_box(self, features, proposals):
130 |         box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
131 |         box_features = self.box_head(box_features)
132 |         pred_class_logits, pred_proposal_deltas = self.box_predictor(box_features)
133 |         del box_features
134 | 
135 |         outputs = LibraRCNNOutputs(
136 |             self.box2box_transform,
137 |             pred_class_logits,
138 |             pred_proposal_deltas,
139 |             proposals,
140 |             self.smooth_l1_beta,
141 |         )
142 |         if self.training:
143 |             if self.train_on_pred_boxes:
144 |                 pred_boxes = outputs.predict_boxes_for_gt_classes()
145 |                 for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
146 |                     proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
147 |             return outputs.losses()
148 |         else:
149 |             pred_instances, _ = outputs.inference(
150 |                 self.test_score_thresh, self.test_nms_thresh, self.test_detections_per_img
151 |             )
152 |             return pred_instances
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/det/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__init__.py


--------------------------------------------------------------------------------
/det/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/det/utils/__pycache__/comm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/comm.cpython-36.pyc


--------------------------------------------------------------------------------
/det/utils/comm.py:
--------------------------------------------------------------------------------
 1 | import torch.distributed as dist
 2 | from detectron2.utils.comm import get_world_size
 3 | 
 4 | 
 5 | def reduce_sum(tensor):
 6 |     world_size = get_world_size()
 7 |     if world_size < 2:
 8 |         return tensor
 9 |     tensor = tensor.clone()
10 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
11 |     return tensor
12 | 


--------------------------------------------------------------------------------
/det/utils/measures.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | # Adapted from https://github.com/ShichenLiu/CondenseNet/blob/master/utils.py
  3 | from __future__ import absolute_import
  4 | from __future__ import unicode_literals
  5 | from __future__ import print_function
  6 | from __future__ import division
  7 | 
  8 | import operator
  9 | 
 10 | from functools import reduce
 11 | 
 12 | 
 13 | def get_num_gen(gen):
 14 |     return sum(1 for x in gen)
 15 | 
 16 | 
 17 | def is_pruned(layer):
 18 |     try:
 19 |         layer.mask
 20 |         return True
 21 |     except AttributeError:
 22 |         return False
 23 | 
 24 | 
 25 | def is_leaf(model):
 26 |     return get_num_gen(model.children()) == 0
 27 | 
 28 | 
 29 | def get_layer_info(layer):
 30 |     layer_str = str(layer)
 31 |     type_name = layer_str[:layer_str.find('(')].strip()
 32 |     return type_name
 33 | 
 34 | 
 35 | def get_layer_param(model):
 36 |     return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()])
 37 | 
 38 | 
 39 | ### The input batch size should be 1 to call this function
 40 | def measure_layer(layer, *args):
 41 |     global count_ops, count_params
 42 | 
 43 |     for x in args:
 44 |         delta_ops = 0
 45 |         delta_params = 0
 46 |         multi_add = 1
 47 |         type_name = get_layer_info(layer)
 48 | 
 49 |         ### ops_conv
 50 |         if type_name in ['Conv2d']:
 51 |             out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] - layer.kernel_size[0]) /
 52 |                         layer.stride[0] + 1)
 53 |             out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] - layer.kernel_size[1]) /
 54 |                         layer.stride[1] + 1)
 55 |             delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
 56 |             delta_params = get_layer_param(layer)
 57 | 
 58 |         elif type_name in ['ConvTranspose2d']:
 59 |             _, _, in_h, in_w = x.size()
 60 |             out_h = int((in_h-1)*layer.stride[0] - 2 * layer.padding[0] + layer.kernel_size[0] + layer.output_padding[0])
 61 |             out_w = int((in_w-1)*layer.stride[1] - 2 * layer.padding[1] + layer.kernel_size[1] + layer.output_padding[1])
 62 |             delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] *  \
 63 |                         layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
 64 |             delta_params = get_layer_param(layer)
 65 | 
 66 |         ### ops_learned_conv
 67 |         elif type_name in ['LearnedGroupConv']:
 68 |             measure_layer(layer.relu, x)
 69 |             measure_layer(layer.norm, x)
 70 |             conv = layer.conv
 71 |             out_h = int((x.size()[2] + 2 * conv.padding[0] - conv.kernel_size[0]) /
 72 |                         conv.stride[0] + 1)
 73 |             out_w = int((x.size()[3] + 2 * conv.padding[1] - conv.kernel_size[1]) /
 74 |                         conv.stride[1] + 1)
 75 |             delta_ops = conv.in_channels * conv.out_channels * conv.kernel_size[0] * conv.kernel_size[1] * out_h * out_w / layer.condense_factor * multi_add
 76 |             delta_params = get_layer_param(conv) / layer.condense_factor
 77 | 
 78 |         ### ops_nonlinearity
 79 |         elif type_name in ['ReLU', 'ReLU6']:
 80 |             delta_ops = x.numel()
 81 |             delta_params = get_layer_param(layer)
 82 | 
 83 |         ### ops_pooling
 84 |         elif type_name in ['AvgPool2d', 'MaxPool2d']:
 85 |             in_w = x.size()[2]
 86 |             kernel_ops = layer.kernel_size * layer.kernel_size
 87 |             out_w = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1)
 88 |             out_h = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1)
 89 |             delta_ops = x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops
 90 |             delta_params = get_layer_param(layer)
 91 | 
 92 |         elif type_name in ['LastLevelMaxPool']:
 93 |             pass
 94 | 
 95 |         elif type_name in ['AdaptiveAvgPool2d']:
 96 |             delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3]
 97 |             delta_params = get_layer_param(layer)
 98 | 
 99 |         elif type_name in ['ZeroPad2d', 'RetinaNetPostProcessor']:
100 |             pass
101 |             #delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3]
102 |             #delta_params = get_layer_param(layer)
103 | 
104 |         ### ops_linear
105 |         elif type_name in ['Linear']:
106 |             weight_ops = layer.weight.numel() * multi_add
107 |             bias_ops = layer.bias.numel()
108 |             delta_ops = x.size()[0] * (weight_ops + bias_ops)
109 |             delta_params = get_layer_param(layer)
110 | 
111 |         ### ops_nothing
112 |         elif type_name in ['BatchNorm2d', 'Dropout2d', 'DropChannel', 'Dropout', 'FrozenBatchNorm2d', 'GroupNorm']:
113 |             delta_params = get_layer_param(layer)
114 | 
115 |         elif type_name in ['SumTwo']:
116 |             delta_ops = x.numel()
117 | 
118 |         elif type_name in ['AggregateCell']:
119 |             if not layer.pre_transform:
120 |                 delta_ops = 2 * x.numel() # twice for each input
121 |             else:
122 |                 measure_layer(layer.branch_1, x)
123 |                 measure_layer(layer.branch_2, x)
124 |                 delta_params = get_layer_param(layer)
125 | 
126 |         elif type_name in ['Identity', 'Zero']:
127 |             pass
128 | 
129 |         elif type_name in ['Scale']:
130 |             delta_params = get_layer_param(layer)
131 |             delta_ops = x.numel()
132 | 
133 |         elif type_name in ['FCOSPostProcessor', 'RPNPostProcessor', 'KeypointPostProcessor',
134 |                            'ROIAlign', 'PostProcessor', 'KeypointRCNNPredictor', 
135 |                            'NaiveSyncBatchNorm', 'Upsample', 'Sequential']:
136 |             pass
137 | 
138 |         elif type_name in ['DeformConv']:
139 |             # don't count bilinear
140 |             offset_conv = list(layer.parameters())[0]
141 |             delta_ops = reduce(operator.mul, offset_conv.size(), x.size()[2] * x.size()[3])
142 |             out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0]
143 |                          - layer.kernel_size[0]) / layer.stride[0] + 1)
144 |             out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1]
145 |                          - layer.kernel_size[1]) / layer.stride[1] + 1)
146 |             delta_ops += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
147 |             delta_params = get_layer_param(layer)
148 | 
149 |         ### unknown layer type
150 |         else:
151 |             raise TypeError('unknown layer type: %s' % type_name)
152 | 
153 |         count_ops += delta_ops
154 |         count_params += delta_params
155 |     return
156 | 
157 | 
158 | def measure_model(model, x):
159 |     global count_ops, count_params
160 |     count_ops = 0
161 |     count_params = 0
162 | 
163 |     def should_measure(x):
164 |         return is_leaf(x) or is_pruned(x)
165 | 
166 |     def modify_forward(model):
167 |         for child in model.children():
168 |             if should_measure(child):
169 |                 def new_forward(m):
170 |                     def lambda_forward(*args):
171 |                         measure_layer(m, *args)
172 |                         return m.old_forward(*args)
173 |                     return lambda_forward
174 |                 child.old_forward = child.forward
175 |                 child.forward = new_forward(child)
176 |             else:
177 |                 modify_forward(child)
178 | 
179 |     def restore_forward(model):
180 |         for child in model.children():
181 |             # leaf node
182 |             if is_leaf(child) and hasattr(child, 'old_forward'):
183 |                 child.forward = child.old_forward
184 |                 child.old_forward = None
185 |             else:
186 |                 restore_forward(child)
187 | 
188 |     modify_forward(model)
189 |     out = model.forward(x)
190 |     restore_forward(model)
191 | 
192 |     return out, count_ops, count_params
193 | 


--------------------------------------------------------------------------------
/det/utils/visualizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from detectron2.utils.visualizer import Visualizer
 4 | 
 5 | 
 6 | class TextVisualizer(Visualizer):
 7 |     def draw_instance_predictions(self, predictions):
 8 |         beziers = predictions.beziers.numpy()
 9 |         scores = predictions.scores.tolist()
10 |         recs = predictions.recs
11 | 
12 |         self.overlay_instances(beziers, recs, scores)
13 | 
14 |         return self.output
15 | 
16 |     def _bezier_to_poly(self, bezier):
17 |         # bezier to polygon
18 |         u = np.linspace(0, 1, 20)
19 |         bezier = bezier.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4)
20 |         points = np.outer((1 - u) ** 3, bezier[:, 0]) \
21 |             + np.outer(3 * u * ((1 - u) ** 2), bezier[:, 1]) \
22 |             + np.outer(3 * (u ** 2) * (1 - u), bezier[:, 2]) \
23 |             + np.outer(u ** 3, bezier[:, 3])
24 |         points = np.concatenate((points[:, :2], points[:, 2:]), axis=0)
25 | 
26 |         return points
27 | 
28 |     def _decode_recognition(self, rec):
29 |         CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
30 | 
31 |         s = ''
32 |         for c in rec:
33 |             c = int(c)
34 |             if c < 95:
35 |                 s += CTLABELS[c]
36 |             elif c == 95:
37 |                 s += u'口'
38 |         return s
39 | 
40 |     def _ctc_decode_recognition(self, rec):
41 |         CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
42 | 
43 |         # ctc decoding
44 |         last_char = False
45 |         s = ''
46 |         for c in rec:
47 |             c = int(c)
48 |             if c < 95:
49 |                 if last_char != c:
50 |                     s += CTLABELS[c]
51 |                     last_char = c
52 |             elif c == 95:
53 |                 s += u'口'
54 |             else:
55 |                 last_char = False
56 |         return s
57 | 
58 |     def overlay_instances(self, beziers, recs, scores, alpha=0.5):
59 |         color = (0.1, 0.2, 0.5)
60 | 
61 |         for bezier, rec, score in zip(beziers, recs, scores):
62 |             polygon = self._bezier_to_poly(bezier)
63 |             self.draw_polygon(polygon, color, alpha=alpha)
64 | 
65 |             # draw text in the top left corner
66 |             text = self._decode_recognition(rec)
67 |             text = "{:.3f}: {}".format(score, text)
68 |             lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
69 |             text_pos = polygon[0]
70 |             horiz_align = "left"
71 |             font_size = self._default_font_size
72 | 
73 |             self.draw_text(
74 |                 text,
75 |                 text_pos,
76 |                 color=lighter_color,
77 |                 horizontal_alignment=horiz_align,
78 |                 font_size=font_size,
79 |             )


--------------------------------------------------------------------------------
/docs/nms/README.md:
--------------------------------------------------------------------------------
1 | #NMS解析
2 | 
3 | https://zhuanlan.zhihu.com/p/80902998


--------------------------------------------------------------------------------
/docs/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/docs/nms/demo_nms.cpp:
--------------------------------------------------------------------------------
 1 | #include  <iostream>
 2 | #include <opencv2/opencv.hpp>
 3 | #include <opencv2/highgui/highgui.hpp>
 4 | #include <opencv2/imgproc/types_c.h>
 5 | 
 6 | static void sort( int n, const float* x, int* indices){
 7 |     int i, j;
 8 |     for(i=0; i<n; i++){
 9 |         for(j=i+1; j<n; j++){
10 |             if(x[indices[j]]> x[indices[i]]){
11 |                 std::swap(indices[i], indices[j]);
12 |             }
13 | 
14 |         }
15 |     }
16 | 
17 | }
18 | 
19 | int nonMaximumSuppression(
20 |     int numBoxes, const CvPoint *points,
21 |     const CvPoint *oppositePoints, const float *score,
22 |     float overlapThreshold, int *numBoxOut, CvPoint **pointsOut,
23 |     CvPoint **oppositePointsOut, float *scoreOut){
24 | 
25 | 
26 |         int i, j, index;
27 |         float *box_area = (float *)malloc(numBoxes*sizeof(float));
28 |         int *indices = (int *)malloc(numBoxes*sizeof(int));
29 |         int *is_suppressed = (int *)malloc(numBoxes*sizeof(int));
30 | 
31 |         for(i=0; i<numBoxes; i++){
32 |             indices[i] = i;
33 |             is_suppressed[i] =0;
34 |             box_area[i] = (float)( oppositePoints[i].x - points[i].x +1)*
35 |                                   (oppositePoints[i].y - points[i].y+1);
36 | 
37 |         }
38 | 
39 |         sort(numBoxes, score, indices);
40 | 
41 |         for(int i=0; i< numBoxes; i++){
42 |             if(!is_suppressed[indices[i]]){
43 |                 for(int j = i+1; j< numBoxes; j++){
44 |                     if(!is_suppressed[indices[j]]){
45 | 
46 |                         int x1max = max(points[indices[i]].x, points[indices[j]].x);
47 | 
48 |                         int x2min = min(oppositePoints[indices[i]].x, oppositePoints[indices[j]].x);
49 | 
50 |                         int y1max = max(points[indices[i]].y, points[indices[j]].y);
51 | 
52 |                         int y2min = min(oppositePoints[indices[i]].y, oppositePoints[indices[j]].y);
53 | 
54 |                         int overlapWidth = x2min - x1max +1;
55 |                         int overlapHeight = y2min - y1max +1;
56 | 
57 |                         if(overlapWidth > 0 && overlapHeight >0){
58 |                             float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ;
59 |                             if(overlapPart > overlapThreshold){
60 |                                 is_suppressed[indices[i]] =1;
61 |                             }
62 |                         }
63 |                     }
64 |                 }
65 |             }
66 |         }
67 | 
68 |         *numBoxOut = 0;
69 |         for(int i = 0; i < numBoxes; i++){
70 |             if(!is_suppressed[i]) (*numBoxOut)++;
71 |         }
72 | 
73 |         *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint));
74 |         *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint));
75 |         *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float));
76 | 
77 |         index =0;
78 | 
79 |         for(int i = 0; i < numBoxes; i++){
80 |             if(!is_suppressed[indices[i]]){
81 |                 (*pointsOut)[index].x = points[indices[i]].x;
82 |                 (*pointsOut)[index].y = points[indices[i]].y;
83 |                 (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;
84 |                 (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;
85 |                 (*scoreOut)[index] = score[indices[i]];
86 |                 index++;
87 |             }
88 |         }
89 | 
90 |         free(indices);
91 |         free(box_area);
92 |         free(is_suppressed);
93 |         return 1;
94 |     }


--------------------------------------------------------------------------------
/docs/nms/demo_nms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch 
  3 | def compute_iou(box1, box2, wh= False):
  4 |     if wh == False:
  5 |         xmin1, ymin1, xmax1, ymax1 = box1
  6 |         xmin2, ymin2, xmax2, ymax2 = box2
  7 |     else:
  8 |         xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0)
  9 |         xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0)
 10 |         xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0)
 11 |         xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0)
 12 |         
 13 |     xx1 = np.max([xmin1, xmin2])0
 14 |     yy1 = np.max([ymin1, ymin2])
 15 |     xx2 = np.max([xmax1, xmax2])
 16 |     yy2 = np.max([ymax1, ymax2])
 17 |     
 18 |     area1 = (xmax1 - xmin1)*(ymax1 - ymin1)
 19 |     area2 = (xmax2 - xmin2)*(ymax2 - ymin2)
 20 |     
 21 |     inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1])
 22 |     iou = inter_area / (area1 + area2 - inter_area +1e-6)
 23 |     return iou
 24 | 
 25 | def py_cpu_nms(dets, thresh):
 26 |      #dets某个类的框，x1、y1、x2、y2、以及置信度score
 27 |     #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]]
 28 |     x1 = dets[:, 0]
 29 |     y1 = dets[:, 1]
 30 |     x2 = dets[:, 2]
 31 |     y2 = dets[:, 3]
 32 |     scores = dets[:, 4]
 33 |     #面積
 34 |     areas = (x2- x1 +1)*(y2- y1 +1)
 35 |     order = scores.argsort()[::-1]#按照置信度降序排序
 36 |     keep = []
 37 |     
 38 |     while order.size() >0:
 39 |         i = order[0]#保留得分最高的
 40 |         keep.append(i)
 41 |          #得到相交区域,左上及右下 
 42 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 43 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 44 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 45 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 46 |          #得到相交区域,左上及右下 
 47 |         w = np.maximum(0.0, xx2-xx1 +1)
 48 |         h = np.maximum(0.0, yy2-yy1 +1)
 49 |         
 50 |         inter = w*h
 51 |         #计算IoU：重叠面积 /（面积1+面积2-重叠面积） 
 52 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 53 |         #保留IoU小于阈值的box 
 54 |         inds = np.where(ovr <= thresh)[0]
 55 |         order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位 
 56 |         
 57 |     return keep                       
 58 | 
 59 | def iou(self, box1, box2):
 60 |     N = box1.size(0)
 61 |     M = box2.size(0)
 62 |     
 63 |     lt = torch.max(  # 左上角的点
 64 |             box1[:, :2].unsqueeze(1).expand(N, M, 2),   # [N,2]->[N,1,2]->[N,M,2]
 65 |             box2[:, :2].unsqueeze(0).expand(N, M, 2),   # [M,2]->[1,M,2]->[N,M,2]
 66 |             )
 67 |  
 68 |     rb = torch.min(
 69 |             box1[:, 2:].unsqueeze(1).expand(N, M, 2),
 70 |             box2[:, 2:].unsqueeze(0).expand(N, M, 2),
 71 |             )
 72 |     wh = rb -lt
 73 |     wh[wh < 0] = 0
 74 |     
 75 |     inter = wh[:,:,0]*wh[:,:,1]
 76 |     area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1])
 77 |     area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1])
 78 |     area1 = area1.unsqueeze(1).expand(N, M)
 79 |     area2 = area2.unsqueeze(1).expand(N, M)
 80 |     
 81 |     iou = inter / (area1 + area2 - inter)
 82 |     
 83 |     return iou 
 84 |     
 85 | def nms(self, bboxes, scores, threshold=0.5):
 86 |     x1 = bboxes[:, 0]
 87 |     y1 = bboxes[:, 1]
 88 |     x2 = bboxes[:, 2]
 89 |     y2 = bboxes[:, 3]
 90 |     areas = (x2 - x1) * (y2 - y1)
 91 |     _, order = scores.sort(0, descending=True)
 92 |     keep =[]
 93 |     while order.numel()>0:
 94 |         if order.numel()==1:
 95 |             i = order.item()
 96 |             keep.append(i)
 97 |         else:
 98 |             i =order[0].item()
 99 |             keep.append(i)
100 |             xx1 = x1[order[1:]].clamp(min= x1[i])
101 |             yy1 = y1[order[1:]].clamp(min= y1[i])
102 |             xx2 = x2[order[1:]].clamp(min= x2[i])
103 |             yy2 = y2[order[1:]].clamp(min= y2[i])
104 |             
105 |             inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)
106 |             
107 |             iou = inter / (areas[i] + areas[i] -inter)
108 |             idx = (iou < threshold).nonzero().squeeze()
109 |             
110 |             order = order[idx +1]
111 |     return torch.LongTensor(keep)        
112 |                    
113 |     
114 |     


--------------------------------------------------------------------------------
/docs/nms/nms.cpp:
--------------------------------------------------------------------------------
 1 | #include  <iostream>
 2 | #include <opencv2/opencv.hpp>
 3 | #include <opencv2/highgui/highgui.hpp>
 4 | #include <opencv2/imgproc/types_c.h>
 5 | 
 6 | static void sort( int n, const float* x, int* indices){
 7 |     int i, j;
 8 |     for(i=0; i<n; i++){
 9 |         for(j=i+1; j<n; j++){
10 |             if(x[indices[j]]> x[indices[i]]){
11 |                 std::swap(indices[i], indices[j]);
12 |             }
13 | 
14 |         }
15 |     }
16 | 
17 | }
18 | 
19 | int nonMaximumSuppression(
20 |     int numBoxes, const CvPoint *points,
21 |     const CvPoint *oppositePoints, const float *score,
22 |     float overlapThreshold, int *numBoxOut, CvPoint **pointsOut,
23 |     CvPoint **oppositePointsOut, float *scoreOut){
24 | 
25 | 
26 |         int i, j, index;
27 |         float *box_area = (float *)malloc(numBoxes*sizeof(float));
28 |         int *indices = (int *)malloc(numBoxes*sizeof(int));
29 |         int *is_suppressed = (int *)malloc(numBoxes*sizeof(int));
30 | 
31 |         for(i=0; i<numBoxes; i++){
32 |             indices[i] = i;
33 |             is_suppressed[i] =0;
34 |             box_area[i] = (float)( oppositePoints[i].x - points[i].x +1)*
35 |                                   (oppositePoints[i].y - points[i].y+1);
36 | 
37 |         }
38 | 
39 |         sort(numBoxes, score, indices);
40 | 
41 |         for(int i=0; i< numBoxes; i++){
42 |             if(!is_suppressed[indices[i]]){
43 |                 for(int j = i+1; j< numBoxes; j++){
44 |                     if(!is_suppressed[indices[j]]){
45 | 
46 |                         int x1max = max(points[indices[i]].x, points[indices[j]].x);
47 | 
48 |                         int x2min = min(oppositePoints[indices[i]].x, oppositePoints[indices[j]].x);
49 | 
50 |                         int y1max = max(points[indices[i]].y, points[indices[j]].y);
51 | 
52 |                         int y2min = min(oppositePoints[indices[i]].y, oppositePoints[indices[j]].y);
53 | 
54 |                         int overlapWidth = x2min - x1max +1;
55 |                         int overlapHeight = y2min - y1max +1;
56 | 
57 |                         if(overlapWidth > 0 && overlapHeight >0){
58 |                             float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ;
59 |                             if(overlapPart > overlapThreshold){
60 |                                 is_suppressed[indices[i]] =1;
61 |                             }
62 |                         }
63 |                     }
64 |                 }
65 |             }
66 |         }
67 | 
68 |         *numBoxOut = 0;
69 |         for(int i = 0; i < numBoxes; i++){
70 |             if(!is_suppressed[i]) (*numBoxOut)++;
71 |         }
72 | 
73 |         *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint));
74 |         *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint));
75 |         *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float));
76 | 
77 |         index =0;
78 | 
79 |         for(int i = 0; i < numBoxes; i++){
80 |             if(!is_suppressed[indices[i]]){
81 |                 (*pointsOut)[index].x = points[indices[i]].x;
82 |                 (*pointsOut)[index].y = points[indices[i]].y;
83 |                 (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;
84 |                 (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;
85 |                 (*scoreOut)[index] = score[indices[i]];
86 |                 index++;
87 |             }
88 |         }
89 | 
90 |         free(indices);
91 |         free(box_area);
92 |         free(is_suppressed);
93 |         return 1;
94 |     }


--------------------------------------------------------------------------------
/docs/nms/nms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch 
  3 | def compute_iou(box1, box2, wh= False):
  4 |     if wh == False:
  5 |         xmin1, ymin1, xmax1, ymax1 = box1
  6 |         xmin2, ymin2, xmax2, ymax2 = box2
  7 |     else:
  8 |         xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0)
  9 |         xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0)
 10 |         xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0)
 11 |         xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0)
 12 |         
 13 |     xx1 = np.max([xmin1, xmin2])0
 14 |     yy1 = np.max([ymin1, ymin2])
 15 |     xx2 = np.max([xmax1, xmax2])
 16 |     yy2 = np.max([ymax1, ymax2])
 17 |     
 18 |     area1 = (xmax1 - xmin1)*(ymax1 - ymin1)
 19 |     area2 = (xmax2 - xmin2)*(ymax2 - ymin2)
 20 |     
 21 |     inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1])
 22 |     iou = inter_area / (area1 + area2 - inter_area +1e-6)
 23 |     return iou
 24 | 
 25 | def py_cpu_nms(dets, thresh):
 26 |      #dets某个类的框，x1、y1、x2、y2、以及置信度score
 27 |     #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]]
 28 |     x1 = dets[:, 0]
 29 |     y1 = dets[:, 1]
 30 |     x2 = dets[:, 2]
 31 |     y2 = dets[:, 3]
 32 |     scores = dets[:, 4]
 33 |     #面積
 34 |     areas = (x2- x1 +1)*(y2- y1 +1)
 35 |     order = scores.argsort()[::-1]#按照置信度降序排序
 36 |     keep = []
 37 |     
 38 |     while order.size() >0:
 39 |         i = order[0]#保留得分最高的
 40 |         keep.append(i)
 41 |          #得到相交区域,左上及右下 
 42 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 43 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 44 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 45 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 46 |          #得到相交区域,左上及右下 
 47 |         w = np.maximum(0.0, xx2-xx1 +1)
 48 |         h = np.maximum(0.0, yy2-yy1 +1)
 49 |         
 50 |         inter = w*h
 51 |         #计算IoU：重叠面积 /（面积1+面积2-重叠面积） 
 52 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 53 |         #保留IoU小于阈值的box 
 54 |         inds = np.where(ovr <= thresh)[0]
 55 |         order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位 
 56 |         
 57 |     return keep                       
 58 | 
 59 | def iou(self, box1, box2):
 60 |     N = box1.size(0)
 61 |     M = box2.size(0)
 62 |     
 63 |     lt = torch.max(  # 左上角的点
 64 |             box1[:, :2].unsqueeze(1).expand(N, M, 2),   # [N,2]->[N,1,2]->[N,M,2]
 65 |             box2[:, :2].unsqueeze(0).expand(N, M, 2),   # [M,2]->[1,M,2]->[N,M,2]
 66 |             )
 67 |  
 68 |     rb = torch.min(
 69 |             box1[:, 2:].unsqueeze(1).expand(N, M, 2),
 70 |             box2[:, 2:].unsqueeze(0).expand(N, M, 2),
 71 |             )
 72 |     wh = rb -lt
 73 |     wh[wh < 0] = 0
 74 |     
 75 |     inter = wh[:,:,0]*wh[:,:,1]
 76 |     area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1])
 77 |     area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1])
 78 |     area1 = area1.unsqueeze(1).expand(N, M)
 79 |     area2 = area2.unsqueeze(1).expand(N, M)
 80 |     
 81 |     iou = inter / (area1 + area2 - inter)
 82 |     
 83 |     return iou 
 84 |     
 85 | def nms(self, bboxes, scores, threshold=0.5):
 86 |     x1 = bboxes[:, 0]
 87 |     y1 = bboxes[:, 1]
 88 |     x2 = bboxes[:, 2]
 89 |     y2 = bboxes[:, 3]
 90 |     areas = (x2 - x1) * (y2 - y1)
 91 |     _, order = scores.sort(0, descending=True)
 92 |     keep =[]
 93 |     while order.numel()>0:
 94 |         if order.numel()==1:
 95 |             i = order.item()
 96 |             keep.append(i)
 97 |         else:
 98 |             i =order[0].item()
 99 |             keep.append(i)
100 |             xx1 = x1[order[1:]].clamp(min= x1[i])
101 |             yy1 = y1[order[1:]].clamp(min= y1[i])
102 |             xx2 = x2[order[1:]].clamp(min= x2[i])
103 |             yy2 = y2[order[1:]].clamp(min= y2[i])
104 |             
105 |             inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)
106 |             
107 |             iou = inter / (areas[i] + areas[i] -inter)
108 |             idx = (iou < threshold).nonzero().squeeze()
109 |             
110 |             order = order[idx +1]
111 |     return torch.LongTensor(keep)        
112 |                    
113 |     
114 |     


--------------------------------------------------------------------------------
/docs/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from . import nms_cpu, nms_cuda
  5 | from .soft_nms_cpu import soft_nms_cpu
  6 | 
  7 | 
  8 | def nms(dets, iou_thr, device_id=None):
  9 |     """Dispatch to either CPU or GPU NMS implementations.
 10 | 
 11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
 12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
 13 |     will be used. The returned type will always be the same as inputs.
 14 | 
 15 |     Arguments:
 16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
 17 |         iou_thr (float): IoU threshold for NMS.
 18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
 19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
 20 | 
 21 |     Returns:
 22 |         tuple: kept bboxes and indice, which is always the same data type as
 23 |             the input.
 24 | 
 25 |     Example:
 26 |         >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
 27 |         >>>                  [49.3, 32.9, 51.0, 35.3, 0.9],
 28 |         >>>                  [49.2, 31.8, 51.0, 35.4, 0.5],
 29 |         >>>                  [35.1, 11.5, 39.1, 15.7, 0.5],
 30 |         >>>                  [35.6, 11.8, 39.3, 14.2, 0.5],
 31 |         >>>                  [35.3, 11.5, 39.9, 14.5, 0.4],
 32 |         >>>                  [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
 33 |         >>> iou_thr = 0.7
 34 |         >>> supressed, inds = nms(dets, iou_thr)
 35 |         >>> assert len(inds) == len(supressed) == 3
 36 |     """
 37 |     # convert dets (tensor or numpy array) to tensor
 38 |     if isinstance(dets, torch.Tensor):
 39 |         is_numpy = False
 40 |         dets_th = dets
 41 |     elif isinstance(dets, np.ndarray):
 42 |         is_numpy = True
 43 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
 44 |         dets_th = torch.from_numpy(dets).to(device)
 45 |     else:
 46 |         raise TypeError(
 47 |             'dets must be either a Tensor or numpy array, but got {}'.format(
 48 |                 type(dets)))
 49 | 
 50 |     # execute cpu or cuda nms
 51 |     if dets_th.shape[0] == 0:
 52 |         inds = dets_th.new_zeros(0, dtype=torch.long)
 53 |     else:
 54 |         if dets_th.is_cuda:
 55 |             inds = nms_cuda.nms(dets_th, iou_thr)
 56 |         else:
 57 |             inds = nms_cpu.nms(dets_th, iou_thr)
 58 | 
 59 |     if is_numpy:
 60 |         inds = inds.cpu().numpy()
 61 |     return dets[inds, :], inds
 62 | 
 63 | 
 64 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
 65 |     """
 66 |     Example:
 67 |         >>> dets = np.array([[4., 3., 5., 3., 0.9],
 68 |         >>>                  [4., 3., 5., 4., 0.9],
 69 |         >>>                  [3., 1., 3., 1., 0.5],
 70 |         >>>                  [3., 1., 3., 1., 0.5],
 71 |         >>>                  [3., 1., 3., 1., 0.4],
 72 |         >>>                  [3., 1., 3., 1., 0.0]], dtype=np.float32)
 73 |         >>> iou_thr = 0.7
 74 |         >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5)
 75 |         >>> assert len(inds) == len(supressed) == 3
 76 |     """
 77 |     if isinstance(dets, torch.Tensor):
 78 |         is_tensor = True
 79 |         dets_np = dets.detach().cpu().numpy()
 80 |     elif isinstance(dets, np.ndarray):
 81 |         is_tensor = False
 82 |         dets_np = dets
 83 |     else:
 84 |         raise TypeError(
 85 |             'dets must be either a Tensor or numpy array, but got {}'.format(
 86 |                 type(dets)))
 87 | 
 88 |     method_codes = {'linear': 1, 'gaussian': 2}
 89 |     if method not in method_codes:
 90 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
 91 |     new_dets, inds = soft_nms_cpu(
 92 |         dets_np,
 93 |         iou_thr,
 94 |         method=method_codes[method],
 95 |         sigma=sigma,
 96 |         min_score=min_score)
 97 | 
 98 |     if is_tensor:
 99 |         return dets.new_tensor(new_dets), dets.new_tensor(
100 |             inds, dtype=torch.long)
101 |     else:
102 |         return new_dets.astype(np.float32), inds.astype(np.int64)
103 | 


--------------------------------------------------------------------------------
/docs/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   //判断是否是cpu tensor
 7 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 8 | 
 9 |   if (dets.numel() == 0) {
10 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
11 |   }
12 |   //tensor.select(1, index)等效于tensor[:, index]
13 |   auto x1_t = dets.select(1, 0).contiguous();
14 |   auto y1_t = dets.select(1, 1).contiguous();
15 |   auto x2_t = dets.select(1, 2).contiguous();
16 |   auto y2_t = dets.select(1, 3).contiguous();
17 |   auto scores = dets.select(1, 4).contiguous();
18 |   //计算面积
19 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
20 |   //降序排序
21 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
22 | 
23 |   auto ndets = dets.size(0);
24 |   at::Tensor suppressed_t =
25 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
26 | 
27 |   //这个是记录每个bbox的状态的。即有没有被前面的box给过滤掉。 
28 |   auto suppressed = suppressed_t.data<uint8_t>();
29 |   auto order = order_t.data<int64_t>();
30 |   auto x1 = x1_t.data<scalar_t>();
31 |   auto y1 = y1_t.data<scalar_t>();
32 |   auto x2 = x2_t.data<scalar_t>();
33 |   auto y2 = y2_t.data<scalar_t>();
34 |   auto areas = areas_t.data<scalar_t>();
35 | 
36 |   for (int64_t _i = 0; _i < ndets; _i++) {
37 |     auto i = order[_i];
38 |     if (suppressed[i] == 1) continue;
39 |     auto ix1 = x1[i];
40 |     auto iy1 = y1[i];
41 |     auto ix2 = x2[i];
42 |     auto iy2 = y2[i];
43 |     auto iarea = areas[i];
44 | 
45 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
46 |       auto j = order[_j];
47 |       if (suppressed[j] == 1) continue;
48 |       auto xx1 = std::max(ix1, x1[j]);
49 |       auto yy1 = std::max(iy1, y1[j]);
50 |       auto xx2 = std::min(ix2, x2[j]);
51 |       auto yy2 = std::min(iy2, y2[j]);
52 | 
53 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
54 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
55 |       auto inter = w * h;
56 |       auto ovr = inter / (iarea + areas[j] - inter);//计算交并比
57 |       if (ovr >= threshold) suppressed[j] = 1;
58 |     }
59 |   }
60 |   return at::nonzero(suppressed_t == 0).squeeze(1);
61 | }
62 | 
63 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
64 |   at::Tensor result;
65 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
66 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
67 |   });
68 |   return result;
69 | }
70 | //转换成python
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |   m.def("nms", &nms, "non-maximum suppression");
73 | }


--------------------------------------------------------------------------------
/docs/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/docs/nms/src/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <ATen/DeviceGuard.h>
  5 | 
  6 | #include <THC/THC.h>
  7 | #include <THC/THCDeviceUtils.cuh>
  8 | 
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | 
 13 | //!see https://zhuanlan.zhihu.com/p/80902998
 14 | 
 15 | 
 16 | int const threadsPerBlock = sizeof(unsigned long long) * 8;//分块数量
 17 | 
 18 | //在gpu上计算IOU
 19 | __device__ inline float devIoU(float const * const a, float const * const b) {
 20 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 21 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 22 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 23 |   float interS = width * height;
 24 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 25 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 26 |   return interS / (Sa + Sb - interS);
 27 | }
 28 | 
 29 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 30 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 31 |   const int row_start = blockIdx.y;
 32 |   const int col_start = blockIdx.x;//确定当前block的横纵坐标
 33 | 
 34 |   // if (row_start > col_start) return;
 35 | 
 36 |   const int row_size =
 37 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 38 |   const int col_size =
 39 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 40 |   ////求当前block的行长度，如果最后一个block不够除，则取余下的，
 41 |   //比如ceil(105/25) = 5，105 = 4 * 25 + 5最后一块高为5，此时row_size=5，其余的row_size = 25
 42 | 
 43 |   // 共享内存，加速数据读取，
 44 |   //同一个block有共享内存，所以先使用共享内存存下当前block全部需要读取的数据
 45 |   //(即box的坐标和置信度)然后就不在dev_boxes里面读数据了，而是读share memory里面的数据
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 | 
 48 |   if (threadIdx.x < col_size) {
 49 |     block_boxes[threadIdx.x * 5 + 0] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 51 |     block_boxes[threadIdx.x * 5 + 1] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 53 |     block_boxes[threadIdx.x * 5 + 2] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 55 |     block_boxes[threadIdx.x * 5 + 3] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 57 |     block_boxes[threadIdx.x * 5 + 4] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 59 |   }
 60 | 
 61 |   //为了保证线程安全，必须等所有的线程都把数据存到share memory以后，统一开始线程
 62 |   __syncthreads();
 63 |   // 这个if判断去掉多余的thread，保证余下的块可以被正确执行
 64 |   // 每个block里面有row_size个线程
 65 |   // 每个线程i，for一个col_size的循环，计算该block里面第i个box和该block中每个列box的IOU
 66 |   if (threadIdx.x < row_size) {
 67 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 68 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 69 |     int i = 0;
 70 |     unsigned long long t = 0;
 71 |     int start = 0;
 72 |     if (row_start == col_start) {
 73 |       start = threadIdx.x + 1;//对角线上的block, //自己跟自己就不要计算IOU了
 74 |     }
 75 |     for (i = start; i < col_size; i++) {
 76 |        //主循环，求该box和所有列box的IOU，如果满足条件，则使用一个mask把该位置1
 77 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 78 |         t |= 1ULL << i;//掩码
 79 |       }
 80 |     }
 81 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 82 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 83 |   }
 84 | }
 85 | 
 86 | // boxes is a N x 5 tensor
 87 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 88 | 
 89 |   // Ensure CUDA uses the input tensor device.
 90 |   at::DeviceGuard guard(boxes.device());
 91 | 
 92 |   using scalar_t = float;
 93 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 94 |   auto scores = boxes.select(1, 4);//tensor.select(1, index)等效于tensor[:, index]
 95 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 96 |   auto boxes_sorted = boxes.index_select(0, order_t);
 97 | 
 98 |   int boxes_num = boxes.size(0);
 99 | 
100 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
101 | 
102 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
103 | 
104 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
105 | 
106 |   unsigned long long* mask_dev = NULL;
107 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
108 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
111 | 
112 | 
113 |   // 定义blocks的数量和每个block的线程数
114 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
115 |               THCCeilDiv(boxes_num, threadsPerBlock));
116 |   dim3 threads(threadsPerBlock);
117 |    // 调用kernel，最后在mask_dev中求出每两个框的IoU是否超过阈值t
118 |   nms_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(boxes_num,
119 |                                   nms_overlap_thresh,
120 |                                   boxes_dev,
121 |                                   mask_dev);
122 | 
123 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
124 |   THCudaCheck(cudaMemcpyAsync(
125 | 			  &mask_host[0],
126 | 			  mask_dev,
127 | 			  sizeof(unsigned long long) * boxes_num * col_blocks,
128 | 			  cudaMemcpyDeviceToHost,
129 | 			  at::cuda::getCurrentCUDAStream()
130 | 			  ));
131 | 
132 |   std::vector<unsigned long long> remv(col_blocks);// 初始是所有框都在S里面，移出标记都置为0
133 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);// 初始是所有框都在S里面，移出标记都置为0
134 | 
135 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
136 |   int64_t* keep_out = keep.data<int64_t>();
137 | 
138 |   int num_to_keep = 0;
139 |   for (int i = 0; i < boxes_num; i++) {
140 |     int nblock = i / threadsPerBlock; //求这个box是在哪个block里面计算的
141 |     int inblock = i % threadsPerBlock; //求这个box在block的哪个线程计算的
142 |     
143 |     // 对于每个box，如果他在S中，则加入结果集，并移出S
144 |     // 并把和他的IOU大于阈值的所有box全部移出S
145 |     if (!(remv[nblock] & (1ULL << inblock))) {
146 |       keep_out[num_to_keep++] = i;//加入结果集操作
147 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
148 |       for (int j = nblock; j < col_blocks; j++) {
149 |         remv[j] |= p[j];//移出S操作
150 |       }
151 |     }
152 |   }
153 | 
154 |   THCudaFree(state, mask_dev);
155 |   // TODO improve this part
156 |   return std::get<0>(order_t.index({
157 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
158 |                          order_t.device(), keep.scalar_type())
159 |                      }).sort(0, false));
160 | }
161 | 


--------------------------------------------------------------------------------
/docs/nms/src/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------
  2 | # Soft-NMS: Improving Object Detection With One Line of Code
  3 | # Copyright (c) University of Maryland, College Park
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Navaneeth Bodla and Bharat Singh
  6 | # Modified by Kai Chen
  7 | # ----------------------------------------------------------
  8 | 
  9 | # cython: language_level=3, boundscheck=False
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | 
 14 | 
 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 16 |     return a if a >= b else b
 17 | 
 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 19 |     return a if a <= b else b
 20 | 
 21 | 
 22 | def soft_nms_cpu(
 23 |     np.ndarray[float, ndim=2] boxes_in,
 24 |     float iou_thr,
 25 |     unsigned int method=1,
 26 |     float sigma=0.5,
 27 |     float min_score=0.001,
 28 | ):
 29 |     boxes = boxes_in.copy()
 30 |     cdef int N = boxes.shape[0]
 31 |     cdef float iw, ih, box_area
 32 |     cdef float ua
 33 |     cdef int pos = 0
 34 |     cdef float maxscore = 0
 35 |     cdef int maxpos = 0
 36 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
 37 |     inds = np.arange(N)
 38 | 
 39 |     for i in range(N):
 40 |         maxscore = boxes[i, 4]
 41 |         maxpos = i
 42 | 
 43 |         tx1 = boxes[i, 0]
 44 |         ty1 = boxes[i, 1]
 45 |         tx2 = boxes[i, 2]
 46 |         ty2 = boxes[i, 3]
 47 |         ts = boxes[i, 4]
 48 |         ti = inds[i]
 49 | 
 50 |         pos = i + 1
 51 |         # get max box
 52 |         while pos < N:
 53 |             if maxscore < boxes[pos, 4]:
 54 |                 maxscore = boxes[pos, 4]
 55 |                 maxpos = pos
 56 |             pos = pos + 1
 57 | 
 58 |         # add max box as a detection
 59 |         boxes[i, 0] = boxes[maxpos, 0]
 60 |         boxes[i, 1] = boxes[maxpos, 1]
 61 |         boxes[i, 2] = boxes[maxpos, 2]
 62 |         boxes[i, 3] = boxes[maxpos, 3]
 63 |         boxes[i, 4] = boxes[maxpos, 4]
 64 |         inds[i] = inds[maxpos]
 65 | 
 66 |         # swap ith box with position of max box
 67 |         boxes[maxpos, 0] = tx1
 68 |         boxes[maxpos, 1] = ty1
 69 |         boxes[maxpos, 2] = tx2
 70 |         boxes[maxpos, 3] = ty2
 71 |         boxes[maxpos, 4] = ts
 72 |         inds[maxpos] = ti
 73 | 
 74 |         tx1 = boxes[i, 0]
 75 |         ty1 = boxes[i, 1]
 76 |         tx2 = boxes[i, 2]
 77 |         ty2 = boxes[i, 3]
 78 |         ts = boxes[i, 4]
 79 | 
 80 |         pos = i + 1
 81 |         # NMS iterations, note that N changes if detection boxes fall below
 82 |         # threshold
 83 |         while pos < N:
 84 |             x1 = boxes[pos, 0]
 85 |             y1 = boxes[pos, 1]
 86 |             x2 = boxes[pos, 2]
 87 |             y2 = boxes[pos, 3]
 88 |             s = boxes[pos, 4]
 89 | 
 90 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 91 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 92 |             if iw > 0:
 93 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 94 |                 if ih > 0:
 95 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 96 |                     ov = iw * ih / ua  # iou between max box and detection box
 97 | 
 98 |                     if method == 1:  # linear
 99 |                         if ov > iou_thr:
100 |                             weight = 1 - ov
101 |                         else:
102 |                             weight = 1
103 |                     elif method == 2:  # gaussian
104 |                         weight = np.exp(-(ov * ov) / sigma)
105 |                     else:  # original NMS
106 |                         if ov > iou_thr:
107 |                             weight = 0
108 |                         else:
109 |                             weight = 1
110 | 
111 |                     boxes[pos, 4] = weight * boxes[pos, 4]
112 | 
113 |                     # if box score falls below threshold, discard the box by
114 |                     # swapping with last box update N
115 |                     if boxes[pos, 4] < min_score:
116 |                         boxes[pos, 0] = boxes[N-1, 0]
117 |                         boxes[pos, 1] = boxes[N-1, 1]
118 |                         boxes[pos, 2] = boxes[N-1, 2]
119 |                         boxes[pos, 3] = boxes[N-1, 3]
120 |                         boxes[pos, 4] = boxes[N-1, 4]
121 |                         inds[pos] = inds[N - 1]
122 |                         N = N - 1
123 |                         pos = pos - 1
124 | 
125 |             pos = pos + 1
126 | 
127 |     return boxes[:N], inds[:N]
128 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import glob
 5 | import os
 6 | from setuptools import find_packages, setup
 7 | import torch
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
12 | 
13 | 
14 | def get_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     extensions_dir = os.path.join(this_dir, "det", "layers", "csrc")
17 | 
18 |     main_source = os.path.join(extensions_dir, "vision.cpp")
19 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
21 |         os.path.join(extensions_dir, "*.cu")
22 |     )
23 | 
24 |     sources = [main_source] + sources
25 | 
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |         # It's better if pytorch can do this by default ..
43 |         CC = os.environ.get("CC", None)
44 |         if CC is not None:
45 |             extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
46 | 
47 |     sources = [os.path.join(extensions_dir, s) for s in sources]
48 | 
49 |     include_dirs = [extensions_dir]
50 | 
51 |     ext_modules = [
52 |         extension(
53 |             "det._C",
54 |             sources,
55 |             include_dirs=include_dirs,
56 |             define_macros=define_macros,
57 |             extra_compile_args=extra_compile_args,
58 |         )
59 |     ]
60 | 
61 |     return ext_modules
62 | 
63 | 
64 | setup(
65 |     name="Det",
66 |     version="0.1.1",
67 |     author="HTF",
68 |     url="",
69 |     description=""
70 |     "platform for instance-level detection tasks based on Detectron2.",
71 |     packages=find_packages(exclude=("configs", "tests")),
72 |     python_requires=">=3.6",
73 |     install_requires=[
74 |         "termcolor>=1.1",
75 |         "Pillow>=6.0",
76 |         "yacs>=0.1.6",
77 |         "tabulate",
78 |         "cloudpickle",
79 |         "matplotlib",
80 |         "tqdm>4.29.0",
81 |         "tensorboard",
82 |         "python-Levenshtein",
83 |         "Polygon3",
84 |         "shapely",
85 |     ],
86 |     extras_require={"all": ["psutil"]},
87 |     ext_modules=get_extensions(),
88 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
89 | )


--------------------------------------------------------------------------------
/tools/compute_flops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from detectron2.engine import default_argument_parser, default_setup
 3 | 
 4 | from det.config import get_cfg
 5 | from det.utils.measures import measure_model
 6 | 
 7 | from train_net import Trainer
 8 | 
 9 | 
10 | def setup(args):
11 |     """
12 |     Create configs and perform basic setups.
13 |     """
14 |     cfg = get_cfg()
15 |     cfg.merge_from_file(args.config_file)
16 |     cfg.merge_from_list(args.opts)
17 |     cfg.freeze()
18 |     default_setup(cfg, args)
19 |     return cfg
20 | 
21 | 
22 | def main(args):
23 |     cfg = setup(args)
24 | 
25 |     model = Trainer.build_model(cfg)
26 |     model.eval().cuda()
27 |     input_size = (3, 512, 512)
28 |     image = torch.zeros(*input_size)
29 |     batched_input = {"image": image}
30 |     ops, params = measure_model(model, [batched_input])
31 |     print('ops: {:.2f}G\tparams: {:.2f}M'.format(ops / 2**30, params / 2**20))
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     args = default_argument_parser().parse_args()
36 |     print("Command Line Args:", args)
37 |     main(args)
38 | 


--------------------------------------------------------------------------------
/tools/visualize_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | import argparse
  4 | import numpy as np
  5 | import os
  6 | from itertools import chain
  7 | import cv2
  8 | import tqdm
  9 | from PIL import Image
 10 | 
 11 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
 12 | from detectron2.data import detection_utils as utils
 13 | from detectron2.data.build import filter_images_with_few_keypoints
 14 | from detectron2.utils.logger import setup_logger
 15 | from detectron2.utils.visualizer import Visualizer
 16 | 
 17 | from det.config import get_cfg
 18 | from det.data.dataset_mapper import DatasetMapperWithBasis
 19 | 
 20 | 
 21 | def setup(args):
 22 |     cfg = get_cfg()
 23 |     if args.config_file:
 24 |         cfg.merge_from_file(args.config_file)
 25 |     cfg.merge_from_list(args.opts)
 26 |     cfg.freeze()
 27 |     return cfg
 28 | 
 29 | 
 30 | def parse_args(in_args=None):
 31 |     parser = argparse.ArgumentParser(description="Visualize ground-truth data")
 32 |     parser.add_argument(
 33 |         "--source",
 34 |         choices=["annotation", "dataloader"],
 35 |         required=True,
 36 |         help="visualize the annotations or the data loader (with pre-processing)",
 37 |     )
 38 |     parser.add_argument("--config-file", metavar="FILE", help="path to config file")
 39 |     parser.add_argument("--output-dir", default="./", help="path to output directory")
 40 |     parser.add_argument("--show", action="store_true", help="show output in a window")
 41 |     parser.add_argument(
 42 |         "--opts",
 43 |         help="Modify config options using the command-line",
 44 |         default=[],
 45 |         nargs=argparse.REMAINDER,
 46 |     )
 47 |     return parser.parse_args(in_args)
 48 | 
 49 | 
 50 | if __name__ == "__main__":
 51 |     args = parse_args()
 52 |     logger = setup_logger()
 53 |     logger.info("Arguments: " + str(args))
 54 |     cfg = setup(args)
 55 | 
 56 |     dirname = args.output_dir
 57 |     os.makedirs(dirname, exist_ok=True)
 58 |     metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
 59 | 
 60 |     def output(vis, fname):
 61 |         if args.show:
 62 |             print(fname)
 63 |             cv2.imshow("window", vis.get_image()[:, :, ::-1])
 64 |             cv2.waitKey()
 65 |         else:
 66 |             filepath = os.path.join(dirname, fname)
 67 |             print("Saving to {} ...".format(filepath))
 68 |             vis.save(filepath)
 69 | 
 70 |     scale = 2.0 if args.show else 1.0
 71 |     if args.source == "dataloader":
 72 |         mapper = DatasetMapperWithBasis(cfg, True)
 73 |         train_data_loader = build_detection_train_loader(cfg, mapper)
 74 |         for batch in train_data_loader:
 75 |             for per_image in batch:
 76 |                 # Pytorch tensor is in (C, H, W) format
 77 |                 img = per_image["image"].permute(1, 2, 0)
 78 |                 if cfg.INPUT.FORMAT == "BGR":
 79 |                     img = img[:, :, [2, 1, 0]]
 80 |                 else:
 81 |                     img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))
 82 | 
 83 |                 visualizer = Visualizer(img, metadata=metadata, scale=scale)
 84 |                 target_fields = per_image["instances"].get_fields()
 85 |                 labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
 86 |                 vis = visualizer.overlay_instances(
 87 |                     labels=labels,
 88 |                     boxes=target_fields.get("gt_boxes", None),
 89 |                     masks=target_fields.get("gt_masks", None),
 90 |                     keypoints=target_fields.get("gt_keypoints", None),
 91 |                 )
 92 |                 output(vis, str(per_image["image_id"]) + ".jpg")
 93 |     else:
 94 |         dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
 95 |         if cfg.MODEL.KEYPOINT_ON:
 96 |             dicts = filter_images_with_few_keypoints(dicts, 1)
 97 |         for dic in tqdm.tqdm(dicts):
 98 |             img = utils.read_image(dic["file_name"], "RGB")
 99 |             visualizer = Visualizer(img, metadata=metadata, scale=scale)
100 |             vis = visualizer.draw_dataset_dict(dic)
101 |             output(vis, os.path.basename(dic["file_name"]))


--------------------------------------------------------------------------------