├── .vscode
└── settings.json
├── README.md
├── configs
├── FCOS-Detection
│ ├── Base-ATSS.yaml
│ ├── Base-FCOS.yaml
│ ├── FCOS_RT
│ │ ├── MS_DLA_34_4x_syncbn.yaml
│ │ ├── MS_DLA_34_4x_syncbn_bn_head.yaml
│ │ ├── MS_DLA_34_4x_syncbn_shared_towers.yaml
│ │ ├── MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml
│ │ ├── MS_R_50_4x_syncbn.yaml
│ │ └── MS_R_50_4x_syncbn_bn_head.yaml
│ ├── MS_R_101_2x.yaml
│ ├── MS_R_50_2x.yaml
│ ├── MS_X_101_32x8d_2x.yaml
│ ├── MS_X_101_32x8d_2x_dcnv2.yaml
│ ├── MS_X_101_64x4d_2x.yaml
│ ├── MS_X_101_64x4d_2x_dcnv2.yaml
│ ├── README.md
│ ├── RS_50_1x.yaml
│ ├── R_50_1x.yaml
│ ├── atss_r_50.yaml
│ └── vovnet
│ │ ├── MS_V_39_3x.yaml
│ │ ├── MS_V_57_3x.yaml
│ │ ├── MS_V_99_3x.yaml
│ │ └── README.md
└── RCNN
│ ├── 550_R_50_FPN_3x.yaml
│ ├── Base-RCNN-FPN.yaml
│ ├── Base-RCNN.yaml
│ ├── LVIS
│ └── R_50_1x.yaml
│ ├── R_101_3x.yaml
│ ├── faster_rcnn_RS_50_FPN_1x.yaml
│ ├── faster_rcnn_R_50_FPN_1x.yaml
│ ├── faster_rcnn_R_50_FPN_1x_tta.yaml
│ └── libra_rcnn
│ └── r_50_1x.yaml
├── det
├── _C.cpython-36m-x86_64-linux-gnu.so
├── __init__.py
├── __pycache__
│ └── __init__.cpython-36.pyc
├── checkpoint
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ └── det_checkpoint.cpython-36.pyc
│ └── det_checkpoint.py
├── config
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── config.cpython-36.pyc
│ │ └── defaults.cpython-36.pyc
│ ├── config.py
│ └── defaults.py
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── builtin.cpython-36.pyc
│ │ ├── dataset_mapper.cpython-36.pyc
│ │ └── detection_utils.cpython-36.pyc
│ ├── builtin.py
│ ├── dataset_mapper.py
│ ├── datasets
│ │ ├── __pycache__
│ │ │ └── text.cpython-36.pyc
│ │ ├── augment_lists.py
│ │ ├── fast_augment.py
│ │ └── text.py
│ └── detection_utils.py
├── layers
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── balanced_l1_loss.cpython-36.pyc
│ │ ├── bezier_align.cpython-36.pyc
│ │ ├── conv_with_kaiming_uniform.cpython-36.pyc
│ │ ├── def_roi_align.cpython-36.pyc
│ │ ├── deform_conv.cpython-36.pyc
│ │ ├── gcn.cpython-36.pyc
│ │ ├── iou_loss.cpython-36.pyc
│ │ ├── ml_nms.cpython-36.pyc
│ │ ├── naive_group_norm.cpython-36.pyc
│ │ └── soft_nms.cpython-36.pyc
│ ├── balanced_l1_loss.py
│ ├── bezier_align.py
│ ├── conv_with_kaiming_uniform.py
│ ├── csrc
│ │ ├── BezierAlign
│ │ │ ├── BezierAlign.h
│ │ │ ├── BezierAlign_cpu.cpp
│ │ │ └── BezierAlign_cuda.cu
│ │ ├── DefROIAlign
│ │ │ ├── DefROIAlign.h
│ │ │ └── DefROIAlign_cuda.cu
│ │ ├── cuda_version.cu
│ │ ├── ml_nms
│ │ │ ├── ml_nms.cu
│ │ │ └── ml_nms.h
│ │ └── vision.cpp
│ ├── def_roi_align.py
│ ├── deform_conv.py
│ ├── gcn.py
│ ├── iou_loss.py
│ ├── ml_nms.py
│ ├── naive_group_norm.py
│ └── soft_nms.py
├── modeling
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── one_stage_detector.cpython-36.pyc
│ │ └── rpn_utils.cpython-36.pyc
│ ├── atss
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ ├── atss.cpython-36.pyc
│ │ │ └── atss_outputs.cpython-36.pyc
│ │ ├── atss.py
│ │ └── atss_outputs.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ ├── dla.cpython-36.pyc
│ │ │ ├── fpn.cpython-36.pyc
│ │ │ ├── lpf.cpython-36.pyc
│ │ │ ├── mobilenet.cpython-36.pyc
│ │ │ ├── resnet.cpython-36.pyc
│ │ │ ├── resnet_interval.cpython-36.pyc
│ │ │ ├── resnet_lpf.cpython-36.pyc
│ │ │ ├── splat.cpython-36.pyc
│ │ │ └── vovnet.cpython-36.pyc
│ │ ├── dla.py
│ │ ├── fpn.py
│ │ ├── lpf.py
│ │ ├── mobilenet.py
│ │ ├── resnet.py
│ │ ├── resnet_interval.py
│ │ ├── resnet_lpf.py
│ │ ├── splat.py
│ │ └── vovnet.py
│ ├── fcos
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ ├── fcos.cpython-36.pyc
│ │ │ └── fcos_outputs.cpython-36.pyc
│ │ ├── fcos.py
│ │ └── fcos_outputs.py
│ ├── guided_anchoring
│ │ ├── ga_outputs.py
│ │ └── guided_anchor_head.py
│ ├── one_stage_detector.py
│ ├── poolers.py
│ ├── roi_heads
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ └── libra_rcnn.cpython-36.pyc
│ │ └── libra_rcnn.py
│ ├── rpn_utils.py
│ └── tsd
│ │ └── tsd.py
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── comm.cpython-36.pyc
│ ├── comm.py
│ ├── measures.py
│ └── visualizer.py
├── docs
└── nms
│ ├── README.md
│ ├── __init__.py
│ ├── demo_nms.cpp
│ ├── demo_nms.py
│ ├── nms.cpp
│ ├── nms.py
│ ├── nms_wrapper.py
│ └── src
│ ├── nms_cpu.cpp
│ ├── nms_cuda.cpp
│ ├── nms_kernel.cu
│ ├── soft_nms_cpu.cpp
│ └── soft_nms_cpu.pyx
├── setup.py
└── tools
├── compute_flops.py
├── train_net.py
└── visualize_data.py
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "/home/fei/anaconda3/envs/detectron2/bin/python"
3 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Object-Detection.pytorch
2 |
3 | ## bdd100k Dataset Baseline
4 | - we use `mmdetection` to train all models.
5 | - All models were trained on `bdd100k_train`, and tested on the `bdd100k_val`.
6 | - We use distributed training across 8 Nvdia-1080Ti GPUs.
7 |
8 | ### Anchor-based:
9 | | Name | backbone | tricks | AP | AP50 | AP75 | APs | APm | APl |
10 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
11 | | FasterRCNN | R_50_FPN | | 0.318 | 0.551 | 0.311 | 0.145 | 0.356 | 0.497|
12 | | FasterRCNN | R_101_FPN| | 0.322 | 0.553 | 0.314 | 0.142 | 0.360 | 0.512 |
13 | | CascadeRCNN | R_50_FPN | | 0.332 | 0.558 | 0.331 | 0.150 | 0.371 | 0.520 |
14 | | PISA | R_50_FPN | |
15 | | LibraRCNN| R_50_FPN| |
16 | | GA | R_50_FPN | |
17 |
18 |
19 | ### Anchor-free
20 | | Name | backbone | tricks | AP | AP50 | AP75 | APs | APm | APl |
21 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
22 | | FCOS | R_50_FPN | |0.304 | 0.539 | 0.290 | 0.129 | 0.338 | 0.498 |
23 | | ATSS | R_50_FPN | | 0.329 | 0.562 | 0.323 | 0.141 | 0.367 | 0.517|
24 | | CenterNet| R_50_DCN|
25 | | RepPoints| R_50_FPN | | 0.312 | 0.555 | 0.297 | 0.129 | 0.348 | 0.505 |
26 |
27 | ## CenterNet series
28 | | Name | backbone | Iters | AP | AP50 | AP75 | APs | APm | APl |
29 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
30 | | CenterNet| R_50_DCN |125997 | 27.5269|44.7613| 28.8301 |9.6805 | 31.4682 |43.1641 |
31 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/Base-ATSS.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "OneStageDetector"
3 | BACKBONE:
4 | NAME: "build_fcos_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res3", "res4", "res5"]
9 | PROPOSAL_GENERATOR:
10 | NAME: "ATSS"
11 | ANCHOR_GENERATOR:
12 | NAME: "ATSSAnchorGenerator"
13 |
14 |
15 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
16 | DATASETS:
17 | TRAIN: ("coco_2017_train",)
18 | TEST: ("coco_2017_val",)
19 | SOLVER:
20 | IMS_PER_BATCH: 16
21 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
22 | STEPS: (60000, 80000)
23 | MAX_ITER: 90000
24 | INPUT:
25 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--------------------------------------------------------------------------------
/configs/FCOS-Detection/Base-FCOS.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "OneStageDetector"
3 | BACKBONE:
4 | NAME: "build_fcos_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res3", "res4", "res5"]
9 | PROPOSAL_GENERATOR:
10 | NAME: "FCOS"
11 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
12 | DATASETS:
13 | TRAIN: ("coco_2017_train",)
14 | TEST: ("coco_2017_val",)
15 | SOLVER:
16 | IMS_PER_BATCH: 16
17 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
18 | STEPS: (60000, 80000)
19 | MAX_ITER: 90000
20 | INPUT:
21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | BACKBONE:
9 | NAME: "build_fcos_dla_fpn_backbone"
10 | FREEZE_AT: -1
11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 | DLA:
13 | CONV_BODY: "DLA34"
14 | NORM: "SyncBN"
15 | FPN:
16 | IN_FEATURES: ["level3", "level4", "level5"]
17 | FCOS:
18 | TOP_LEVELS: 0
19 | SIZES_OF_INTEREST: [64, 128]
20 | FPN_STRIDES: [8, 16, 32]
21 | IN_FEATURES: ['p3', 'p4', 'p5']
22 | SOLVER:
23 | STEPS: (300000, 340000)
24 | MAX_ITER: 360000
25 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn"
26 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | BACKBONE:
9 | NAME: "build_fcos_dla_fpn_backbone"
10 | FREEZE_AT: -1
11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 | DLA:
13 | CONV_BODY: "DLA34"
14 | NORM: "SyncBN"
15 | FPN:
16 | IN_FEATURES: ["level3", "level4", "level5"]
17 | FCOS:
18 | TOP_LEVELS: 0
19 | SIZES_OF_INTEREST: [64, 128]
20 | FPN_STRIDES: [8, 16, 32]
21 | IN_FEATURES: ['p3', 'p4', 'p5']
22 | NORM: "SyncBN"
23 | SOLVER:
24 | STEPS: (300000, 340000)
25 | MAX_ITER: 360000
26 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_bn_head"
27 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | BACKBONE:
9 | NAME: "build_fcos_dla_fpn_backbone"
10 | FREEZE_AT: -1
11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 | DLA:
13 | CONV_BODY: "DLA34"
14 | NORM: "SyncBN"
15 | FPN:
16 | IN_FEATURES: ["level3", "level4", "level5"]
17 | FCOS:
18 | TOP_LEVELS: 0
19 | SIZES_OF_INTEREST: [64, 128]
20 | FPN_STRIDES: [8, 16, 32]
21 | IN_FEATURES: ['p3', 'p4', 'p5']
22 | NUM_SHARE_CONVS: 4
23 | NUM_BOX_CONVS: 0
24 | NUM_CLS_CONVS: 0
25 | SOLVER:
26 | STEPS: (300000, 340000)
27 | MAX_ITER: 360000
28 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers"
29 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | BACKBONE:
9 | NAME: "build_fcos_dla_fpn_backbone"
10 | FREEZE_AT: -1
11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 | DLA:
13 | CONV_BODY: "DLA34"
14 | NORM: "SyncBN"
15 | FPN:
16 | IN_FEATURES: ["level3", "level4", "level5"]
17 | FCOS:
18 | TOP_LEVELS: 0
19 | SIZES_OF_INTEREST: [64, 128]
20 | FPN_STRIDES: [8, 16, 32]
21 | IN_FEATURES: ['p3', 'p4', 'p5']
22 | NUM_SHARE_CONVS: 4
23 | NUM_BOX_CONVS: 0
24 | NUM_CLS_CONVS: 0
25 | NORM: "SyncBN"
26 | SOLVER:
27 | STEPS: (300000, 340000)
28 | MAX_ITER: 360000
29 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers_bn_head"
30 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | RESNETS:
10 | DEPTH: 50
11 | NORM: "SyncBN"
12 | FCOS:
13 | TOP_LEVELS: 0
14 | SIZES_OF_INTEREST: [64, 128]
15 | FPN_STRIDES: [8, 16, 32]
16 | IN_FEATURES: ['p3', 'p4', 'p5']
17 | SOLVER:
18 | STEPS: (300000, 340000)
19 | MAX_ITER: 360000
20 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn"
21 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | INPUT:
3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
4 | MAX_SIZE_TRAIN: 900
5 | MAX_SIZE_TEST: 736
6 | MIN_SIZE_TEST: 512
7 | MODEL:
8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | RESNETS:
10 | DEPTH: 50
11 | NORM: "SyncBN"
12 | FCOS:
13 | TOP_LEVELS: 0
14 | SIZES_OF_INTEREST: [64, 128]
15 | FPN_STRIDES: [8, 16, 32]
16 | IN_FEATURES: ['p3', 'p4', 'p5']
17 | NORM: "SyncBN"
18 | SOLVER:
19 | STEPS: (300000, 340000)
20 | MAX_ITER: 360000
21 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn_bn_head"
22 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_101_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | RESNETS:
5 | DEPTH: 101
6 | SOLVER:
7 | STEPS: (120000, 160000)
8 | MAX_ITER: 180000
9 | OUTPUT_DIR: "output/fcos/R_101_2x"
10 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_50_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | SOLVER:
7 | STEPS: (120000, 160000)
8 | MAX_ITER: 180000
9 | OUTPUT_DIR: "output/fcos/R_50_2x"
10 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
4 | PIXEL_STD: [57.375, 57.120, 58.395]
5 | RESNETS:
6 | STRIDE_IN_1X1: False # this is a C2 model
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 8
9 | DEPTH: 101
10 | SOLVER:
11 | STEPS: (120000, 160000)
12 | MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/X_101_2x"
14 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
4 | PIXEL_STD: [57.375, 57.120, 58.395]
5 | RESNETS:
6 | STRIDE_IN_1X1: False # this is a C2 model
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 8
9 | DEPTH: 101
10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 | DEFORM_MODULATED: True
12 | FCOS:
13 | USE_DEFORMABLE: True
14 | SOLVER:
15 | STEPS: (120000, 160000)
16 | MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2"
18 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
4 | PIXEL_STD: [1.0, 1.0, 1.0]
5 | RESNETS:
6 | STRIDE_IN_1X1: False # this is a C2 model
7 | NUM_GROUPS: 64
8 | WIDTH_PER_GROUP: 4
9 | DEPTH: 101
10 | SOLVER:
11 | STEPS: (120000, 160000)
12 | MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x"
14 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
4 | PIXEL_STD: [1.0, 1.0, 1.0]
5 | RESNETS:
6 | STRIDE_IN_1X1: False # this is a C2 model
7 | NUM_GROUPS: 64
8 | WIDTH_PER_GROUP: 4
9 | DEPTH: 101
10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 | DEFORM_MODULATED: True
12 | FCOS:
13 | USE_DEFORMABLE: True
14 | SOLVER:
15 | STEPS: (120000, 160000)
16 | MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x_dcnv2"
18 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/README.md:
--------------------------------------------------------------------------------
1 | # FCOS: Fully Convolutional One-Stage Object Detection
2 |
3 | FCOS: Fully Convolutional One-Stage Object Detection;
4 | Zhi Tian, Chunhua Shen, Hao Chen, and Tong He;
5 | In: Proc. Int. Conf. Computer Vision (ICCV), 2019.
6 | arXiv preprint arXiv:1904.01355
7 |
8 | [[`Paper`](https://arxiv.org/abs/1904.01355)] [[`BibTeX`](#citing-fcos)]
9 |
10 | # Installation & Quick Start
11 | No special setup needed. The [default instruction](../../README.md#Installation) is fine.
12 |
13 | ## Models
14 | ### COCO Object Detecton Baselines with [FCOS](https://arxiv.org/abs/1904.01355)
15 |
16 | Name | inf. time | box AP | download
17 | --- |:---:|:---:|:---:
18 | [FCOS_R_50_1x](R_50_1x.yaml) | 16 FPS | 38.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/glqFc13cCoEyHYy/download)
19 | [FCOS_MS_R_50_2x](MS_R_50_2x.yaml) | 16 FPS | 41.0 | [model](https://cloudstor.aarnet.edu.au/plus/s/reA6HVaGX47yKGV/download)
20 | [FCOS_MS_R_101_2x](MS_R_101_2x.yaml) | 12 FPS | 43.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download)
21 | [FCOS_MS_X_101_32x8d_2x](MS_X_101_32x8d_2x.yaml) | 6.6 FPS | 43.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/R7H00WeWKZG45pP/download)
22 | [FCOS_MS_X_101_64x4d_2x](MS_X_101_64x4d_2x.yaml) | 6.1 FPS | 44.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/XOLUCzqKYckNII7/download)
23 | [FCOS_MS_X_101_32x8d_dcnv2_2x](MS_X_101_32x8d_2x_dcnv2.yaml) | 4.6 FPS | 46.6 | [model](https://cloudstor.aarnet.edu.au/plus/s/TDsnYK8OXDTrafF/download)
24 |
25 | *Except for FCOS_R_50_1x, all other models are trained with multi-scale data augmentation.*
26 |
27 | ### FCOS Real-time Models
28 |
29 | Name | inf. time | box AP | download
30 | --- |:---:|:---:|:---:
31 | [FCOS_RT_MS_DLA_34_4x_shtw](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml) | 52 FPS | 39.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/4vc3XwQezyhNvnB/download)
32 | [FCOS_RT_MS_DLA_34_4x](FCOS_RT/MS_DLA_34_4x_syncbn.yaml) | 46 FPS | 40.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/zNPNyTkizaOOsUQ/download)
33 | [FCOS_RT_MS_R_50_4x](FCOS_RT/MS_R_50_4x_syncbn.yaml) | 38 FPS | 40.2 | [model](https://cloudstor.aarnet.edu.au/plus/s/TlnlXUr6lNNSyoZ/download)
34 |
35 | If you prefer BN in FCOS heads, please try the following models.
36 |
37 | Name | inf. time | box AP | download
38 | --- |:---:|:---:|:---:
39 | [FCOS_RT_MS_DLA_34_4x_shtw_bn](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml) | 52 FPS | 38.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/rdmHHSs4oCg7l7U/download)
40 | [FCOS_RT_MS_DLA_34_4x_bn](FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml) | 48 FPS | 39.4 | [model](https://cloudstor.aarnet.edu.au/plus/s/T5httPVo1VndbD4/download)
41 | [FCOS_RT_MS_R_50_4x_bn](FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml) | 40 FPS | 39.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/dHNUNs0YxVhZAmg/download)
42 |
43 | *Inference time is measured on a NVIDIA 1080Ti with batch size 1. Real-time models use shorter side 512 for inference.*
44 |
45 | # Citing FCOS
46 | If you use FCOS in your research or wish to refer to the baseline results, please use the following BibTeX entries.
47 | ```BibTeX
48 | @inproceedings{tian2019fcos,
49 | title = {{FCOS}: Fully Convolutional One-Stage Object Detection},
50 | author = {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
51 | booktitle = {Proc. Int. Conf. Computer Vision (ICCV)},
52 | year = {2019}
53 | }
54 | ```
55 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/RS_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | #WEIGHTS: "https://hangzh.s3-us-west-1.amazonaws.com/encoding/models/resnest50_detectron-255b5649.pth"
4 | #"detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | MASK_ON: False
6 | RESNETS:
7 | DEPTH: 50
8 | STRIDE_IN_1X1: False
9 | RADIX: 2
10 | DEEP_STEM: True
11 | AVD: True
12 | AVG_DOWN: True
13 | NORM: "SyncBN"
14 | FPN:
15 | NORM: "SyncBN"
16 | ROI_BOX_HEAD:
17 | NAME: "FastRCNNConvFCHead"
18 | NUM_CONV: 4
19 | NUM_FC: 1
20 | NORM: "SyncBN"
21 | PIXEL_MEAN: [123.68, 116.779, 103.939]
22 | PIXEL_STD: [58.393, 57.12, 57.375]
23 | SOLVER:
24 | IMS_PER_BATCH: 16 #16
25 | BASE_LR: 0.02 #original:0.02
26 | INPUT:
27 | MIN_SIZE_TRAIN: (640, 800)
28 | MIN_SIZE_TRAIN_SAMPLING: "range"
29 | MAX_SIZE_TRAIN: 1333
30 | FORMAT: "RGB"
--------------------------------------------------------------------------------
/configs/FCOS-Detection/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | INPUT:
7 | MIN_SIZE_TRAIN: (800,)
8 | SOLVER:
9 | WARMUP_METHOD: "constant"
10 | WARMUP_FACTOR: 0.3333
11 | WARMUP_ITERS: 500
12 | OUTPUT_DIR: "output/fcos/R_50_1x"
13 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/atss_r_50.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-ATSS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | INPUT:
7 | MIN_SIZE_TRAIN: (800,)
8 | SOLVER:
9 | WARMUP_METHOD: "constant"
10 | WARMUP_FACTOR: 0.3333
11 | WARMUP_ITERS: 500
12 | OUTPUT_DIR: "output/atss/R_50_1x"
--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_39_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
4 | BACKBONE:
5 | NAME: "build_fcos_vovnet_fpn_backbone"
6 | FREEZE_AT: 0
7 | VOVNET:
8 | CONV_BODY : "V-39-eSE"
9 | OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 | FPN:
11 | IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 | STEPS: (210000, 250000)
14 | MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_39_ms_3x"
16 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_57_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
4 | BACKBONE:
5 | NAME: "build_fcos_vovnet_fpn_backbone"
6 | FREEZE_AT: 0
7 | VOVNET:
8 | CONV_BODY : "V-57-eSE"
9 | OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 | FPN:
11 | IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 | STEPS: (210000, 250000)
14 | MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_57_ms_3x"
16 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_99_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1"
4 | BACKBONE:
5 | NAME: "build_fcos_vovnet_fpn_backbone"
6 | FREEZE_AT: 0
7 | VOVNET:
8 | CONV_BODY : "V-99-eSE"
9 | OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 | FPN:
11 | IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 | STEPS: (210000, 250000)
14 | MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_99_ms_3x"
16 |
--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/README.md:
--------------------------------------------------------------------------------
1 | # [VoVNet-v2](https://github.com/youngwanLEE/CenterMask) backbone networks in [FCOS](https://github.com/aim-uofa/det)
2 | **Efficient Backbone Network for Object Detection and Segmentation**\
3 | Youngwan Lee
4 |
5 |
6 | [[`vovnet-detectron2`](https://github.com/youngwanLEE/vovnet-detectron2)][[`CenterMask(code)`](https://github.com/youngwanLEE/CenterMask)] [[`VoVNet-v1(arxiv)`](https://arxiv.org/abs/1904.09730)] [[`VoVNet-v2(arxiv)`](https://arxiv.org/abs/1911.06667)] [[`BibTeX`](#CitingVoVNet)]
7 |
8 |
9 |
10 |

11 |
12 |
13 |
14 | ## Comparison with Faster R-CNN and ResNet
15 |
16 | ### Note
17 |
18 | We measure the inference time of all models with batch size 1 on the same V100 GPU machine.
19 |
20 | - pytorch1.3.1
21 | - CUDA 10.1
22 | - cuDNN 7.3
23 |
24 |
25 | |Method|Backbone|lr sched|inference time|AP|APs|APm|APl|download|
26 | |---|:--------:|:---:|:--:|--|----|----|---|--------|
27 | |Faster|R-50-FPN|3x|0.047|40.2|24.2|43.5|52.0|model \| metrics
28 | |Faster|**V2-39-FPN**|3x|0.047|42.7|27.1|45.6|54.0|model \| metrics
29 | |**FCOS**|**V2-39-FPN**|3x|0.045|43.5|28.1|47.2|54.5|model \| metrics
30 | ||
31 | |Faster|R-101-FPN|3x|0.063|42.0|25.2|45.6|54.6|model \| metrics
32 | |Faster|**V2-57-FPN**|3x|0.054|43.3|27.5|46.7|55.3|model \| metrics
33 | |**FCOS**|**V2-57-FPN**|3x|0.051|44.4|28.8|47.2|56.3|model \| metrics
34 | ||
35 | |Faster|X-101-FPN|3x|0.120|43.0|27.2|46.1|54.9|model \| metrics|
36 | |Faster|**V2-99-FPN**|3x|0.073|44.1|28.1|47.0|56.4|model \| metrics|
37 | |**FCOS**|**V2-99-FPN**|3x|0.070|45.2|29.2|48.4|57.3|model \| metrics|
38 |
39 |
40 |
41 | ## Citing VoVNet
42 |
43 | If you use VoVNet, please use the following BibTeX entry.
44 |
45 | ```BibTeX
46 | @inproceedings{lee2019energy,
47 | title = {An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection},
48 | author = {Lee, Youngwan and Hwang, Joong-won and Lee, Sangrok and Bae, Yuseok and Park, Jongyoul},
49 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
50 | year = {2019}
51 | }
52 |
53 | @article{lee2019centermask,
54 | title={CenterMask: Real-Time Anchor-Free Instance Segmentation},
55 | author={Lee, Youngwan and Park, Jongyoul},
56 | journal={arXiv preprint arXiv:1911.06667},
57 | year={2019}
58 | }
59 | ```
60 |
--------------------------------------------------------------------------------
/configs/RCNN/550_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN.yaml"
2 | MODEL:
3 | WEIGHTS: "output/mask_rcnn/550_R_50_3x/model_final.pth"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 | INPUT:
11 | MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550)
12 | MAX_SIZE_TRAIN: 916
13 | MIN_SIZE_TEST: 550
14 | MAX_SIZE_TEST: 916
15 | OUTPUT_DIR: "output/mask_rcnn/550_R_50_3x"
16 |
--------------------------------------------------------------------------------
/configs/RCNN/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | NAME: "build_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
9 | ANCHOR_GENERATOR:
10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12 | RPN:
13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16 | # Detectron1 uses 2000 proposals per-batch,
17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 | POST_NMS_TOPK_TRAIN: 1000
20 | POST_NMS_TOPK_TEST: 1000
21 | ROI_HEADS:
22 | NAME: "StandardROIHeads"
23 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 | ROI_BOX_HEAD:
25 | NAME: "FastRCNNConvFCHead"
26 | NUM_FC: 2
27 | POOLER_RESOLUTION: 7
28 | ROI_MASK_HEAD:
29 | NAME: "MaskRCNNConvUpsampleHead"
30 | NUM_CONV: 4
31 | POOLER_RESOLUTION: 14
32 | DATASETS:
33 | TRAIN: ("coco_2017_train",)
34 | TEST: ("coco_2017_val",)
35 | SOLVER:
36 | IMS_PER_BATCH: 16
37 | BASE_LR: 0.02
38 | STEPS: (60000, 80000)
39 | MAX_ITER: 90000
40 | INPUT:
41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 |
--------------------------------------------------------------------------------
/configs/RCNN/Base-RCNN.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | NAME: "build_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
9 | ANCHOR_GENERATOR:
10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12 | RPN:
13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16 | # Detectron1 uses 2000 proposals per-batch,
17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 | POST_NMS_TOPK_TRAIN: 1000
20 | POST_NMS_TOPK_TEST: 1000
21 | ROI_HEADS:
22 | NAME: "StandardROIHeads"
23 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 | ROI_BOX_HEAD:
25 | NAME: "FastRCNNConvFCHead"
26 | NUM_FC: 2
27 | POOLER_RESOLUTION: 7
28 | ROI_MASK_HEAD:
29 | NAME: "MaskRCNNConvUpsampleHead"
30 | NUM_CONV: 4
31 | POOLER_RESOLUTION: 14
32 | DATASETS:
33 | TRAIN: ("coco_2017_train",)
34 | TEST: ("coco_2017_val",)
35 | SOLVER:
36 | IMS_PER_BATCH: 16
37 | BASE_LR: 0.02
38 | STEPS: (60000, 80000)
39 | MAX_ITER: 90000
40 | INPUT:
41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 |
--------------------------------------------------------------------------------
/configs/RCNN/LVIS/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-LVIS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | OUTPUT_DIR: "output/lvis/mask_rcnn/R_50_1x"
7 |
--------------------------------------------------------------------------------
/configs/RCNN/R_101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_RS_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN-FPN.yaml"
2 | MODEL:
3 | #WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNEST: True
5 | BACKBONE:
6 | NAME: "build_resnest_fpn_backbone"
7 | MASK_ON: False
8 | RESNETS:
9 | DEPTH: 50
10 | STRIDE_IN_1X1: False
11 | RADIX: 2
12 | DEEP_STEM: True
13 | AVD: True
14 | AVG_DOWN: True
15 | NORM: "GN"
16 |
--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/configs/RCNN/faster_rcnn_R_50_FPN_1x_tta.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | TEST:
8 | AUG:
9 | ENABLED: True
--------------------------------------------------------------------------------
/configs/RCNN/libra_rcnn/r_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | ROI_HEADS:
8 | NAME: "LibraRCNNROIHeads"
9 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
--------------------------------------------------------------------------------
/det/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/_C.cpython-36m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/det/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from det import modeling
3 |
4 | __version__ = "0.1.1"
--------------------------------------------------------------------------------
/det/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from .det_checkpoint import detCheckpointer
2 |
3 | __all__ = ["detCheckpointer"]
4 |
--------------------------------------------------------------------------------
/det/checkpoint/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc
--------------------------------------------------------------------------------
/det/checkpoint/det_checkpoint.py:
--------------------------------------------------------------------------------
1 | import pickle, os
2 | from fvcore.common.file_io import PathManager
3 | from detectron2.checkpoint import DetectionCheckpointer
4 |
5 |
6 | class detCheckpointer(DetectionCheckpointer):
7 | """
8 | Same as :class:`DetectronCheckpointer`, but is able to convert models
9 | in AdelaiDet, such as LPF backbone.
10 | """
11 | def _load_file(self, filename):
12 | if filename.endswith(".pkl"):
13 | with PathManager.open(filename, "rb") as f:
14 | data = pickle.load(f, encoding="latin1")
15 | if "model" in data and "__author__" in data:
16 | # file is in Detectron2 model zoo format
17 | self.logger.info("Reading a file from '{}'".format(data["__author__"]))
18 | return data
19 | else:
20 | # assume file is from Caffe2 / Detectron1 model zoo
21 | if "blobs" in data:
22 | # Detection models have "blobs", but ImageNet models don't
23 | data = data["blobs"]
24 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
25 | if "weight_order" in data:
26 | del data["weight_order"]
27 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
28 |
29 | loaded = super()._load_file(filename) # load native pth checkpoint
30 | if "model" not in loaded:
31 | loaded = {"model": loaded}
32 |
33 | basename = os.path.basename(filename).lower()
34 | if "lpf" in basename or "dla" in basename:
35 | loaded["matching_heuristics"] = True
36 | return loaded
37 |
--------------------------------------------------------------------------------
/det/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_cfg
2 |
3 | __all__ = [
4 | "get_cfg",
5 | ]
6 |
--------------------------------------------------------------------------------
/det/config/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/config/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/det/config/__pycache__/defaults.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/defaults.cpython-36.pyc
--------------------------------------------------------------------------------
/det/config/config.py:
--------------------------------------------------------------------------------
1 | from detectron2.config import CfgNode
2 |
3 |
4 | def get_cfg() -> CfgNode:
5 | """
6 | Get a copy of the default config.
7 |
8 | Returns:
9 | a detectron2 CfgNode instance.
10 | """
11 | from .defaults import _C
12 |
13 | return _C.clone()
14 |
--------------------------------------------------------------------------------
/det/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import builtin # ensure the builtin datasets are registered
2 | from .dataset_mapper import DatasetMapperWithBasis
3 |
4 |
5 | __all__ = ["DatasetMapperWithBasis"]
6 | #grid mask trick
7 | #https://github.com/Jia-Research-Lab/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
--------------------------------------------------------------------------------
/det/data/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/data/__pycache__/builtin.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/builtin.cpython-36.pyc
--------------------------------------------------------------------------------
/det/data/__pycache__/dataset_mapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/dataset_mapper.cpython-36.pyc
--------------------------------------------------------------------------------
/det/data/__pycache__/detection_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/detection_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/det/data/builtin.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from detectron2.data.datasets.register_coco import register_coco_instances
4 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
5 |
6 | from .datasets.text import register_text_instances
7 |
8 | # register plane reconstruction
9 |
10 | _PREDEFINED_SPLITS_PIC = {
11 | "pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"),
12 | "pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"),
13 | }
14 |
15 | metadata_pic = {
16 | "thing_classes": ["person"]
17 | }
18 |
19 | _PREDEFINED_SPLITS_TEXT = {
20 | "totaltext_train": ("totaltext/train_images", "totaltext/train.json"),
21 | "totaltext_val": ("totaltext/test_images", "totaltext/test.json"),
22 | "ctw1500_word_train": ("CTW1500/ctwtrain_text_image", "CTW1500/annotations/train_ctw1500_maxlen100_v2.json"),
23 | "ctw1500_word_test": ("CTW1500/ctwtest_text_image","CTW1500/annotations/test_ctw1500_maxlen100.json"),
24 | "syntext1_train": ("syntext1/images", "syntext1/annotations/train.json"),
25 | "syntext2_train": ("syntext2/images", "syntext2/annotations/train.json"),
26 | "mltbezier_word_train": ("mlt2017/images","mlt2017/annotations/train.json"),
27 | }
28 |
29 | metadata_text = {
30 | "thing_classes": ["text"]
31 | }
32 |
33 |
34 | def register_all_coco(root="datasets"):
35 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items():
36 | # Assume pre-defined datasets live in `./datasets`.
37 | register_coco_instances(
38 | key,
39 | metadata_pic,
40 | os.path.join(root, json_file) if "://" not in json_file else json_file,
41 | os.path.join(root, image_root),
42 | )
43 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items():
44 | # Assume pre-defined datasets live in `./datasets`.
45 | register_text_instances(
46 | key,
47 | metadata_text,
48 | os.path.join(root, json_file) if "://" not in json_file else json_file,
49 | os.path.join(root, image_root),
50 | )
51 |
52 |
53 | register_all_coco()
--------------------------------------------------------------------------------
/det/data/dataset_mapper.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import numpy as np
3 | import os.path as osp
4 | import torch
5 | from fvcore.common.file_io import PathManager
6 | from PIL import Image
7 | import logging
8 |
9 | from detectron2.data.dataset_mapper import DatasetMapper
10 | from detectron2.data.detection_utils import SizeMismatchError
11 | from detectron2.data import detection_utils as utils
12 | from detectron2.data import transforms as T
13 |
14 | from .detection_utils import (
15 | build_transform_gen,
16 | transform_instance_annotations,
17 | annotations_to_instances,
18 | gen_crop_transform_with_instance,
19 | )
20 |
21 | """
22 | This file contains the default mapping that's applied to "dataset dicts".
23 | """
24 |
25 | __all__ = ["DatasetMapperWithBasis"]
26 |
27 | logger = logging.getLogger(__name__)
28 |
29 |
30 | class DatasetMapperWithBasis(DatasetMapper):
31 | """
32 | This caller enables the default Detectron2 mapper to read an additional basis semantic label
33 | """
34 |
35 | def __init__(self, cfg, is_train=True):
36 | super().__init__(cfg, is_train)
37 |
38 | # Rebuild transform gen
39 | logger.info("Rebuilding the transform generators. The previous generators will be overridden.")
40 | self.tfm_gens = build_transform_gen(cfg, is_train)
41 |
42 | # fmt: off
43 | self.basis_loss_on = cfg.MODEL.BASIS_MODULE.LOSS_ON
44 | self.ann_set = cfg.MODEL.BASIS_MODULE.ANN_SET
45 | self.crop_box = cfg.INPUT.CROP.CROP_INSTANCE
46 | # fmt: on
47 |
48 | def __call__(self, dataset_dict):
49 | """
50 | Args:
51 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
52 |
53 | Returns:
54 | dict: a format that builtin models in detectron2 accept
55 | """
56 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
57 | # USER: Write your own image loading if it's not from a file
58 | try:
59 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
60 | except Exception as e:
61 | print(dataset_dict["file_name"])
62 | print(e)
63 | raise e
64 | try:
65 | utils.check_image_size(dataset_dict, image)
66 | except SizeMismatchError as e:
67 | expected_wh = (dataset_dict["width"], dataset_dict["height"])
68 | image_wh = (image.shape[1], image.shape[0])
69 | if (image_wh[1], image_wh[0]) == expected_wh:
70 | print("transposing image {}".format(dataset_dict["file_name"]))
71 | image = image.transpose(1, 0, 2)
72 | else:
73 | raise e
74 |
75 | if "annotations" not in dataset_dict or len(dataset_dict["annotations"]) == 0:
76 | image, transforms = T.apply_transform_gens(
77 | ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
78 | )
79 | else:
80 | # Crop around an instance if there are instances in the image.
81 | # USER: Remove if you don't use cropping
82 | if self.crop_gen:
83 | crop_tfm = gen_crop_transform_with_instance(
84 | self.crop_gen.get_crop_size(image.shape[:2]),
85 | image.shape[:2],
86 | dataset_dict["annotations"],
87 | crop_box=self.crop_box,
88 | )
89 | image = crop_tfm.apply_image(image)
90 | try:
91 | image, transforms = T.apply_transform_gens(self.tfm_gens, image)
92 | except ValueError as e:
93 | print(dataset_dict["file_name"])
94 | raise e
95 | if self.crop_gen:
96 | transforms = crop_tfm + transforms
97 |
98 | image_shape = image.shape[:2] # h, w
99 |
100 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
101 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
102 | # Therefore it's important to use torch.Tensor.
103 | dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
104 | # Can use uint8 if it turns out to be slow some day
105 |
106 | # USER: Remove if you don't use pre-computed proposals.
107 | if self.load_proposals:
108 | utils.transform_proposals(
109 | dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
110 | )
111 |
112 | if not self.is_train:
113 | dataset_dict.pop("annotations", None)
114 | dataset_dict.pop("sem_seg_file_name", None)
115 | dataset_dict.pop("pano_seg_file_name", None)
116 | return dataset_dict
117 |
118 | if "annotations" in dataset_dict:
119 | # USER: Modify this if you want to keep them for some reason.
120 | for anno in dataset_dict["annotations"]:
121 | if not self.mask_on:
122 | anno.pop("segmentation", None)
123 | if not self.keypoint_on:
124 | anno.pop("keypoints", None)
125 |
126 | # USER: Implement additional transformations if you have other types of data
127 | annos = [
128 | transform_instance_annotations(
129 | obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
130 | )
131 | for obj in dataset_dict.pop("annotations")
132 | if obj.get("iscrowd", 0) == 0
133 | ]
134 | instances = annotations_to_instances(
135 | annos, image_shape, mask_format=self.mask_format
136 | )
137 | # Create a tight bounding box from masks, useful when image is cropped
138 | if self.crop_gen and instances.has("gt_masks"):
139 | instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
140 | dataset_dict["instances"] = utils.filter_empty_instances(instances)
141 |
142 | # USER: Remove if you don't do semantic/panoptic segmentation.
143 | if "sem_seg_file_name" in dataset_dict:
144 | with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
145 | sem_seg_gt = Image.open(f)
146 | sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
147 | sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
148 | sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
149 | dataset_dict["sem_seg"] = sem_seg_gt
150 |
151 | if self.basis_loss_on and self.is_train:
152 | # load basis supervisions
153 | if self.ann_set == "coco":
154 | basis_sem_path = dataset_dict["file_name"].replace('train2017', 'thing_train2017').replace('image/train', 'thing_train')
155 | else:
156 | basis_sem_path = dataset_dict["file_name"].replace('coco', 'lvis').replace('train2017', 'thing_train')
157 | # change extension to npz
158 | basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
159 | basis_sem_gt = np.load(basis_sem_path)["mask"]
160 | basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
161 | basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
162 | dataset_dict["basis_sem"] = basis_sem_gt
163 | return dataset_dict
164 |
--------------------------------------------------------------------------------
/det/data/datasets/__pycache__/text.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/datasets/__pycache__/text.cpython-36.pyc
--------------------------------------------------------------------------------
/det/data/datasets/fast_augment.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 |
3 | import torch
4 |
5 | from .augment_lists import *
6 |
7 | class RandAugment(object):
8 | def __init__(self, n, m):
9 | self.n = n
10 | self.m = m
11 | self.augment_list = rand_augment_list()
12 | self.topil = ToPIL()
13 |
14 | def __call__(self, img):
15 | img = self.topil(img)
16 | ops = random.choices(self.augment_list, k=self.n)
17 | for op, minval, maxval in ops:
18 | if random.random() > random.uniform(0.2, 0.8):
19 | continue
20 | val = (float(self.m) / 30) * float(maxval - minval) + minval
21 | img = op(img, val)
22 | return img
23 |
24 |
25 | class ToPIL(object):
26 | """Convert image from ndarray format to PIL
27 | """
28 | def __call__(self, img):
29 | x = Image.fromarray(img.asnumpy())
30 | return x
31 |
32 | # class ToNDArray(object):
33 | # def __call__(self, img):
34 | # x = mx.nd.array(np.array(img), .cpu(0))
35 | # return x
36 |
37 | class AugmentationBlock(object):
38 | r"""
39 | AutoAugment Block
40 | Example
41 | -------
42 | >>> aa_transform = AugmentationBlock(autoaug_imagenet_policies())
43 | """
44 | def __init__(self, policies):
45 | """
46 | plicies : list of (name, pr, level)
47 | """
48 | super().__init__()
49 | self.policies = policies
50 | self.topil = ToPIL()
51 | #self.tond = ToNDArray()
52 |
53 | def forward(self, img):
54 | img = self.topil(img)
55 | policy = random.choice(self.policies)
56 | for name, pr, level in policy:
57 | if random.random() > pr:
58 | continue
59 | img = apply_augment(img, name, level)
60 | #img = self.tond(img)
61 | return img
62 |
--------------------------------------------------------------------------------
/det/data/detection_utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import numpy as np
3 |
4 | import torch
5 |
6 | from detectron2.data import transforms as T
7 | from detectron2.data.detection_utils import transform_instance_annotations as d2_transform_inst_anno
8 | from detectron2.data.detection_utils import annotations_to_instances as d2_anno_to_inst
9 | from detectron2.structures import BoxMode
10 |
11 |
12 | def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
13 | """
14 | Generate a CropTransform so that the cropping region contains
15 | the center of the given instance.
16 |
17 | Args:
18 | crop_size (tuple): h, w in pixels
19 | image_size (tuple): h, w
20 | instance (dict): an annotation dict of one instance, in Detectron2's
21 | dataset format.
22 | """
23 | instance = np.random.choice(instances),
24 | instance = instance[0]
25 | crop_size = np.asarray(crop_size, dtype=np.int32)
26 | bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
27 | center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
28 | assert (
29 | image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
30 | ), "The annotation bounding box is outside of the image!"
31 | assert (
32 | image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
33 | ), "Crop size is larger than image size!"
34 |
35 | min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
36 | max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
37 | max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
38 |
39 | y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
40 | x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
41 |
42 | # if some instance is cropped extend the box
43 | if not crop_box:
44 | modified = True
45 | while modified:
46 | modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
47 |
48 | return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
49 |
50 |
51 | def adjust_crop(x0, y0, crop_size, instances):
52 | modified = False
53 |
54 | x1 = x0 + crop_size[1]
55 | y1 = y0 + crop_size[0]
56 |
57 | for instance in instances:
58 | bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
59 |
60 | if bbox[0] < x0 and bbox[2] > x0:
61 | crop_size[1] += x0 - bbox[0]
62 | x0 = bbox[0]
63 | modified = True
64 |
65 | if bbox[0] < x1 and bbox[2] > x1:
66 | crop_size[1] += bbox[2] - x1
67 | x1 = bbox[2]
68 | modified = True
69 |
70 | if bbox[1] < y0 and bbox[3] > y0:
71 | crop_size[0] += y0 - bbox[1]
72 | y0 = bbox[1]
73 | modified = True
74 |
75 | if bbox[1] < y1 and bbox[3] > y1:
76 | crop_size[0] += bbox[3] - y1
77 | y1 = bbox[3]
78 | modified = True
79 |
80 | return modified, x0, y0, crop_size
81 |
82 |
83 | def transform_instance_annotations(
84 | annotation, transforms, image_size, *, keypoint_hflip_indices=None
85 | ):
86 |
87 | annotation = d2_transform_inst_anno(
88 | annotation, transforms, image_size,
89 | keypoint_hflip_indices=keypoint_hflip_indices)
90 |
91 | if "beziers" in annotation:
92 | beziers = transform_beziers_annotations(
93 | annotation["beziers"], transforms
94 | )
95 | annotation["beziers"] = beziers
96 | return annotation
97 |
98 |
99 | def transform_beziers_annotations(beziers, transforms):
100 | """
101 | Transform keypoint annotations of an image.
102 |
103 | Args:
104 | beziers (list[float]): Nx16 float in Detectron2 Dataset format.
105 | transforms (TransformList):
106 | """
107 | # (N*2,) -> (N, 2)
108 | beziers = np.asarray(beziers, dtype="float64").reshape(-1, 2)
109 | beziers = transforms.apply_coords(beziers).reshape(-1)
110 |
111 | # This assumes that HorizFlipTransform is the only one that does flip
112 | do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
113 | if do_hflip:
114 | raise ValueError("Flipping text data is not supported (also disencouraged).")
115 |
116 | return beziers
117 |
118 |
119 | def annotations_to_instances(annos, image_size, mask_format="polygon"):
120 | instance = d2_anno_to_inst(annos, image_size, mask_format)
121 |
122 | # add attributes
123 | if "beziers" in annos[0]:
124 | beziers = [obj.get("beziers", []) for obj in annos]
125 | instance.beziers = torch.as_tensor(
126 | beziers, dtype=torch.float32)
127 |
128 | if "rec" in annos[0]:
129 | text = [obj.get("rec", []) for obj in annos]
130 | instance.text = torch.as_tensor(
131 | text, dtype=torch.int32)
132 |
133 | return instance
134 |
135 |
136 | def build_transform_gen(cfg, is_train):
137 | """
138 | With option to don't use hflip
139 |
140 | Returns:
141 | list[TransformGen]
142 | """
143 | if is_train:
144 | min_size = cfg.INPUT.MIN_SIZE_TRAIN
145 | max_size = cfg.INPUT.MAX_SIZE_TRAIN
146 | sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
147 | else:
148 | min_size = cfg.INPUT.MIN_SIZE_TEST
149 | max_size = cfg.INPUT.MAX_SIZE_TEST
150 | sample_style = "choice"
151 | if sample_style == "range":
152 | assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(
153 | len(min_size)
154 | )
155 |
156 | logger = logging.getLogger(__name__)
157 | tfm_gens = []
158 | tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
159 | if is_train:
160 | if cfg.INPUT.HFLIP_TRAIN:
161 | tfm_gens.append(T.RandomFlip())
162 | logger.info("TransformGens used in training: " + str(tfm_gens))
163 | return tfm_gens
164 |
--------------------------------------------------------------------------------
/det/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_conv import DFConv2d
2 | from .ml_nms import ml_nms
3 | from .iou_loss import IOULoss
4 | from .balanced_l1_loss import BalancedL1Loss, BoundedIoULoss
5 | from .conv_with_kaiming_uniform import conv_with_kaiming_uniform
6 | from .bezier_align import BezierAlign
7 | from .def_roi_align import DefROIAlign
8 | from .naive_group_norm import NaiveGroupNorm
9 | from .gcn import GCN
10 | from .soft_nms import batched_soft_nms
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
--------------------------------------------------------------------------------
/det/layers/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/bezier_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/bezier_align.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/def_roi_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/def_roi_align.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/deform_conv.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/deform_conv.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/gcn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/gcn.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/iou_loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/iou_loss.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/ml_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/ml_nms.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/naive_group_norm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/naive_group_norm.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/__pycache__/soft_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/soft_nms.cpython-36.pyc
--------------------------------------------------------------------------------
/det/layers/balanced_l1_loss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 | import functools
6 |
7 | import torch.nn.functional as F
8 |
9 |
10 | def reduce_loss(loss, reduction):
11 | """Reduce loss as specified.
12 | Args:
13 | loss (Tensor): Elementwise loss tensor.
14 | reduction (str): Options are "none", "mean" and "sum".
15 | Return:
16 | Tensor: Reduced loss tensor.
17 | """
18 | reduction_enum = F._Reduction.get_enum(reduction)
19 | # none: 0, elementwise_mean:1, sum: 2
20 | if reduction_enum == 0:
21 | return loss
22 | elif reduction_enum == 1:
23 | return loss.mean()
24 | elif reduction_enum == 2:
25 | return loss.sum()
26 |
27 |
28 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
29 | """Apply element-wise weight and reduce loss.
30 | Args:
31 | loss (Tensor): Element-wise loss.
32 | weight (Tensor): Element-wise weights.
33 | reduction (str): Same as built-in losses of PyTorch.
34 | avg_factor (float): Avarage factor when computing the mean of losses.
35 | Returns:
36 | Tensor: Processed loss values.
37 | """
38 | # if weight is specified, apply element-wise weight
39 | if weight is not None:
40 | loss = loss * weight
41 |
42 | # if avg_factor is not specified, just reduce the loss
43 | if avg_factor is None:
44 | loss = reduce_loss(loss, reduction)
45 | else:
46 | # if reduction is mean, then average the loss by avg_factor
47 | if reduction == 'mean':
48 | loss = loss.sum() / avg_factor
49 | # if reduction is 'none', then do nothing, otherwise raise an error
50 | elif reduction != 'none':
51 | raise ValueError('avg_factor can not be used with reduction="sum"')
52 | return loss
53 |
54 |
55 | def weighted_loss(loss_func):
56 | """Create a weighted version of a given loss function.
57 | To use this decorator, the loss function must have the signature like
58 | `loss_func(pred, target, **kwargs)`. The function only needs to compute
59 | element-wise loss without any reduction. This decorator will add weight
60 | and reduction arguments to the function. The decorated function will have
61 | the signature like `loss_func(pred, target, weight=None, reduction='mean',
62 | avg_factor=None, **kwargs)`.
63 | :Example:
64 | >>> @weighted_loss
65 | >>> def l1_loss(pred, target):
66 | >>> return (pred - target).abs()
67 | >>> pred = torch.Tensor([0, 2, 3])
68 | >>> target = torch.Tensor([1, 1, 1])
69 | >>> weight = torch.Tensor([1, 0, 1])
70 | >>> l1_loss(pred, target)
71 | tensor(1.3333)
72 | >>> l1_loss(pred, target, weight)
73 | tensor(1.)
74 | >>> l1_loss(pred, target, reduction='none')
75 | tensor([1., 1., 2.])
76 | >>> l1_loss(pred, target, weight, avg_factor=2)
77 | tensor(1.5000)
78 | """
79 |
80 | @functools.wraps(loss_func)
81 | def wrapper(pred,
82 | target,
83 | weight=None,
84 | reduction='mean',
85 | avg_factor=None,
86 | **kwargs):
87 | # get element-wise loss
88 | loss = loss_func(pred, target, **kwargs)
89 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
90 | return loss
91 |
92 | return wrapper
93 |
94 |
95 |
96 | @weighted_loss
97 | def balanced_l1_loss(pred,
98 | target,
99 | beta=1.0,
100 | alpha=0.5,
101 | gamma=1.5,
102 | reduction='mean'):
103 | assert beta > 0
104 | assert pred.size() == target.size() and target.numel() > 0
105 |
106 | diff = torch.abs(pred - target)
107 | b = np.e**(gamma / alpha) - 1
108 | loss = torch.where(
109 | diff < beta, alpha / b *
110 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
111 | gamma * diff + gamma / b - alpha * beta)
112 |
113 | return loss
114 |
115 | @weighted_loss
116 | def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
117 | """Improving Object Localization with Fitness NMS and Bounded IoU Loss,
118 | https://arxiv.org/abs/1711.00164.
119 |
120 | Args:
121 | pred (tensor): Predicted bboxes.
122 | target (tensor): Target bboxes.
123 | beta (float): beta parameter in smoothl1.
124 | eps (float): eps to avoid NaN.
125 | """
126 | pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5
127 | pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5
128 | pred_w = pred[:, 2] - pred[:, 0]
129 | pred_h = pred[:, 3] - pred[:, 1]
130 | with torch.no_grad():
131 | target_ctrx = (target[:, 0] + target[:, 2]) * 0.5
132 | target_ctry = (target[:, 1] + target[:, 3]) * 0.5
133 | target_w = target[:, 2] - target[:, 0]
134 | target_h = target[:, 3] - target[:, 1]
135 |
136 | dx = target_ctrx - pred_ctrx
137 | dy = target_ctry - pred_ctry
138 |
139 | loss_dx = 1 - torch.max(
140 | (target_w - 2 * dx.abs()) /
141 | (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx))
142 | loss_dy = 1 - torch.max(
143 | (target_h - 2 * dy.abs()) /
144 | (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy))
145 | loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w /
146 | (target_w + eps))
147 | loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h /
148 | (target_h + eps))
149 | loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh],
150 | dim=-1).view(loss_dx.size(0), -1)
151 |
152 | loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta,
153 | loss_comb - 0.5 * beta)
154 | return loss
155 |
156 | class BalancedL1Loss(nn.Module):
157 | """Balanced L1 Loss
158 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
159 | """
160 |
161 | def __init__(self,
162 | alpha=0.5,
163 | gamma=1.5,
164 | beta=1.0,
165 | reduction='mean',
166 | loss_weight=1.0):
167 | super(BalancedL1Loss, self).__init__()
168 | self.alpha = alpha
169 | self.gamma = gamma
170 | self.beta = beta
171 | self.reduction = reduction
172 | self.loss_weight = loss_weight
173 |
174 | def forward(self,
175 | pred,
176 | target,
177 | weight=None,
178 | avg_factor=None,
179 | reduction_override=None,
180 | **kwargs):
181 | assert reduction_override in (None, 'none', 'mean', 'sum')
182 | reduction = (
183 | reduction_override if reduction_override else self.reduction)
184 | loss_bbox = self.loss_weight * balanced_l1_loss(
185 | pred,
186 | target,
187 | weight,
188 | alpha=self.alpha,
189 | gamma=self.gamma,
190 | beta=self.beta,
191 | reduction=reduction,
192 | avg_factor=avg_factor,
193 | **kwargs)
194 | return loss_bbox
195 |
196 |
197 | class BoundedIoULoss(nn.Module):
198 |
199 | def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0):
200 | super(BoundedIoULoss, self).__init__()
201 | self.beta = beta
202 | self.eps = eps
203 | self.reduction = reduction
204 | self.loss_weight = loss_weight
205 |
206 | def forward(self,
207 | pred,
208 | target,
209 | weight=None,
210 | avg_factor=None,
211 | reduction_override=None,
212 | **kwargs):
213 | if weight is not None and not torch.any(weight > 0):
214 | return (pred * weight).sum() # 0
215 | assert reduction_override in (None, 'none', 'mean', 'sum')
216 | reduction = (
217 | reduction_override if reduction_override else self.reduction)
218 | loss = self.loss_weight * bounded_iou_loss(
219 | pred,
220 | target,
221 | weight,
222 | beta=self.beta,
223 | eps=self.eps,
224 | reduction=reduction,
225 | avg_factor=avg_factor,
226 | **kwargs)
227 | return loss
228 |
--------------------------------------------------------------------------------
/det/layers/bezier_align.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.nn.modules.utils import _pair
6 |
7 | from det import _C
8 |
9 |
10 | class _BezierAlign(Function):
11 | @staticmethod
12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
13 | ctx.save_for_backward(roi)
14 | ctx.output_size = _pair(output_size)
15 | ctx.spatial_scale = spatial_scale
16 | ctx.sampling_ratio = sampling_ratio
17 | ctx.input_shape = input.size()
18 | ctx.aligned = aligned
19 | output = _C.bezier_align_forward(
20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
21 | )
22 | return output
23 |
24 | @staticmethod
25 | @once_differentiable
26 | def backward(ctx, grad_output):
27 | rois, = ctx.saved_tensors
28 | output_size = ctx.output_size
29 | spatial_scale = ctx.spatial_scale
30 | sampling_ratio = ctx.sampling_ratio
31 | bs, ch, h, w = ctx.input_shape
32 | grad_input = _C.bezier_align_backward(
33 | grad_output,
34 | rois,
35 | spatial_scale,
36 | output_size[0],
37 | output_size[1],
38 | bs,
39 | ch,
40 | h,
41 | w,
42 | sampling_ratio,
43 | ctx.aligned,
44 | )
45 | return grad_input, None, None, None, None, None
46 |
47 |
48 | bezier_align = _BezierAlign.apply
49 |
50 |
51 | class BezierAlign(nn.Module):
52 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
53 | """
54 | Args:
55 | output_size (tuple): h, w
56 | spatial_scale (float): scale the input boxes by this number
57 | sampling_ratio (int): number of inputs samples to take for each output
58 | sample. 0 to take samples densely.
59 | aligned (bool): if False, use the legacy implementation in
60 | Detectron. If True, align the results more perfectly.
61 |
62 | Note:
63 | The meaning of aligned=True:
64 |
65 | With `aligned=True`,
66 | we first appropriately scale the ROI and then shift it by -0.5
67 | prior to calling bezier_align. This produces the correct neighbors; see
68 | det/tests/test_bezier_align.py for verification.
69 |
70 | The difference does not make a difference to the model's performance if
71 | ROIAlign is used together with conv layers.
72 | """
73 | super(BezierAlign, self).__init__()
74 | self.output_size = output_size
75 | self.spatial_scale = spatial_scale
76 | self.sampling_ratio = sampling_ratio
77 | self.aligned = aligned
78 |
79 | def forward(self, input, rois):
80 | """
81 | Args:
82 | input: NCHW images
83 | rois: Bx17 boxes. First column is the index into N. The other 16 columns are [xy]x8.
84 | """
85 | assert rois.dim() == 2 and rois.size(1) == 17
86 | return bezier_align(
87 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
88 | )
89 |
90 | def __repr__(self):
91 | tmpstr = self.__class__.__name__ + "("
92 | tmpstr += "output_size=" + str(self.output_size)
93 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
94 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
95 | tmpstr += ", aligned=" + str(self.aligned)
96 | tmpstr += ")"
97 | return tmpstr
98 |
--------------------------------------------------------------------------------
/det/layers/conv_with_kaiming_uniform.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from detectron2.layers import Conv2d
4 | from .deform_conv import DFConv2d
5 | from detectron2.layers.batch_norm import get_norm
6 |
7 |
8 | def conv_with_kaiming_uniform(
9 | norm=None, activation=None,
10 | use_deformable=False, use_sep=False):
11 | def make_conv(
12 | in_channels, out_channels, kernel_size, stride=1, dilation=1
13 | ):
14 | if use_deformable:
15 | conv_func = DFConv2d
16 | else:
17 | conv_func = Conv2d
18 | if use_sep:
19 | assert in_channels == out_channels
20 | groups = in_channels
21 | else:
22 | groups = 1
23 | conv = conv_func(
24 | in_channels,
25 | out_channels,
26 | kernel_size=kernel_size,
27 | stride=stride,
28 | padding=dilation * (kernel_size - 1) // 2,
29 | dilation=dilation,
30 | groups=groups,
31 | bias=(norm is None)
32 | )
33 | if not use_deformable:
34 | # Caffe2 implementation uses XavierFill, which in fact
35 | # corresponds to kaiming_uniform_ in PyTorch
36 | nn.init.kaiming_uniform_(conv.weight, a=1)
37 | if norm is None:
38 | nn.init.constant_(conv.bias, 0)
39 | module = [conv,]
40 | if norm is not None and len(norm) > 0:
41 | if norm == "GN":
42 | norm_module = nn.GroupNorm(32, out_channels)
43 | else:
44 | norm_module = get_norm(norm, out_channels)
45 | module.append(norm_module)
46 | if activation is not None:
47 | module.append(nn.ReLU(inplace=True))
48 | if len(module) > 1:
49 | return nn.Sequential(*module)
50 | return conv
51 |
52 | return make_conv
53 |
--------------------------------------------------------------------------------
/det/layers/csrc/BezierAlign/BezierAlign.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | #pragma once
3 | #include
4 |
5 | namespace det {
6 |
7 | at::Tensor BezierAlign_forward_cpu(
8 | const at::Tensor& input,
9 | const at::Tensor& rois,
10 | const float spatial_scale,
11 | const int pooled_height,
12 | const int pooled_width,
13 | const int sampling_ratio,
14 | bool aligned);
15 |
16 | at::Tensor BezierAlign_backward_cpu(
17 | const at::Tensor& grad,
18 | const at::Tensor& rois,
19 | const float spatial_scale,
20 | const int pooled_height,
21 | const int pooled_width,
22 | const int batch_size,
23 | const int channels,
24 | const int height,
25 | const int width,
26 | const int sampling_ratio,
27 | bool aligned);
28 |
29 | #ifdef WITH_CUDA
30 | at::Tensor BezierAlign_forward_cuda(
31 | const at::Tensor& input,
32 | const at::Tensor& rois,
33 | const float spatial_scale,
34 | const int pooled_height,
35 | const int pooled_width,
36 | const int sampling_ratio,
37 | bool aligned);
38 |
39 | at::Tensor BezierAlign_backward_cuda(
40 | const at::Tensor& grad,
41 | const at::Tensor& rois,
42 | const float spatial_scale,
43 | const int pooled_height,
44 | const int pooled_width,
45 | const int batch_size,
46 | const int channels,
47 | const int height,
48 | const int width,
49 | const int sampling_ratio,
50 | bool aligned);
51 | #endif
52 |
53 | // Interface for Python
54 | inline at::Tensor BezierAlign_forward(
55 | const at::Tensor& input,
56 | const at::Tensor& rois,
57 | const float spatial_scale,
58 | const int pooled_height,
59 | const int pooled_width,
60 | const int sampling_ratio,
61 | bool aligned) {
62 | if (input.type().is_cuda()) {
63 | #ifdef WITH_CUDA
64 | return BezierAlign_forward_cuda(
65 | input,
66 | rois,
67 | spatial_scale,
68 | pooled_height,
69 | pooled_width,
70 | sampling_ratio,
71 | aligned);
72 | #else
73 | AT_ERROR("Not compiled with GPU support");
74 | #endif
75 | }
76 | return BezierAlign_forward_cpu(
77 | input,
78 | rois,
79 | spatial_scale,
80 | pooled_height,
81 | pooled_width,
82 | sampling_ratio,
83 | aligned);
84 | }
85 |
86 | inline at::Tensor BezierAlign_backward(
87 | const at::Tensor& grad,
88 | const at::Tensor& rois,
89 | const float spatial_scale,
90 | const int pooled_height,
91 | const int pooled_width,
92 | const int batch_size,
93 | const int channels,
94 | const int height,
95 | const int width,
96 | const int sampling_ratio,
97 | bool aligned) {
98 | if (grad.type().is_cuda()) {
99 | #ifdef WITH_CUDA
100 | return BezierAlign_backward_cuda(
101 | grad,
102 | rois,
103 | spatial_scale,
104 | pooled_height,
105 | pooled_width,
106 | batch_size,
107 | channels,
108 | height,
109 | width,
110 | sampling_ratio,
111 | aligned);
112 | #else
113 | AT_ERROR("Not compiled with GPU support");
114 | #endif
115 | }
116 | return BezierAlign_backward_cpu(
117 | grad,
118 | rois,
119 | spatial_scale,
120 | pooled_height,
121 | pooled_width,
122 | batch_size,
123 | channels,
124 | height,
125 | width,
126 | sampling_ratio,
127 | aligned);
128 | }
129 |
130 | } // namespace detectron2
131 |
--------------------------------------------------------------------------------
/det/layers/csrc/DefROIAlign/DefROIAlign.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace det {
5 |
6 | #ifdef WITH_CUDA
7 | at::Tensor DefROIAlign_forward_cuda(
8 | const at::Tensor& input,
9 | const at::Tensor& rois,
10 | const at::Tensor& offsets, // def added
11 | const float spatial_scale,
12 | const int pooled_height,
13 | const int pooled_width,
14 | const int sampling_ratio,
15 | const float trans_std, // def added
16 | bool aligned);
17 |
18 | at::Tensor DefROIAlign_backward_cuda(
19 | const at::Tensor& input, // def added
20 | const at::Tensor& grad,
21 | const at::Tensor& rois,
22 | const at::Tensor& offsets, // def added
23 | const at::Tensor& grad_offsets, // def added
24 | const float spatial_scale,
25 | const int pooled_height,
26 | const int pooled_width,
27 | const int batch_size,
28 | const int channels,
29 | const int height,
30 | const int width,
31 | const int sampling_ratio,
32 | const float trans_std, // def added
33 | bool aligned);
34 | #endif
35 |
36 | // Interface for Python
37 | inline at::Tensor DefROIAlign_forward(
38 | const at::Tensor& input,
39 | const at::Tensor& rois,
40 | const at::Tensor& offsets, // def added
41 | const float spatial_scale,
42 | const int pooled_height,
43 | const int pooled_width,
44 | const int sampling_ratio,
45 | const float trans_std, // def added
46 | bool aligned) {
47 | if (input.type().is_cuda()) {
48 | #ifdef WITH_CUDA
49 | return DefROIAlign_forward_cuda(
50 | input,
51 | rois,
52 | offsets,
53 | spatial_scale,
54 | pooled_height,
55 | pooled_width,
56 | sampling_ratio,
57 | trans_std,
58 | aligned);
59 | #else
60 | AT_ERROR("Not compiled with GPU support");
61 | #endif
62 | }
63 | AT_ERROR("CPU version not supported");
64 | }
65 |
66 | inline at::Tensor DefROIAlign_backward(
67 | const at::Tensor& input, // def added
68 | const at::Tensor& grad,
69 | const at::Tensor& rois,
70 | const at::Tensor& offsets, // def added
71 | const at::Tensor& grad_offsets, // def added
72 | const float spatial_scale,
73 | const int pooled_height,
74 | const int pooled_width,
75 | const int batch_size,
76 | const int channels,
77 | const int height,
78 | const int width,
79 | const int sampling_ratio,
80 | const float trans_std, // def added
81 | bool aligned) {
82 | if (grad.type().is_cuda()) {
83 | #ifdef WITH_CUDA
84 | return DefROIAlign_backward_cuda(
85 | input, // def added
86 | grad,
87 | rois,
88 | offsets, // def added
89 | grad_offsets, // def added
90 | spatial_scale,
91 | pooled_height,
92 | pooled_width,
93 | batch_size,
94 | channels,
95 | height,
96 | width,
97 | sampling_ratio,
98 | trans_std, // def added
99 | aligned);
100 | #else
101 | AT_ERROR("Not compiled with GPU support");
102 | #endif
103 | }
104 | AT_ERROR("CPU version not supported");
105 | }
106 |
107 | } // namespace det
108 |
--------------------------------------------------------------------------------
/det/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | namespace det {
4 | int get_cudart_version() {
5 | return CUDART_VERSION;
6 | }
7 | } // namespace det
8 |
--------------------------------------------------------------------------------
/det/layers/csrc/ml_nms/ml_nms.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 | #include
9 |
10 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
11 |
12 | __device__ inline float devIoU(float const * const a, float const * const b) {
13 | if (a[5] != b[5]) {
14 | return 0.0;
15 | }
16 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
17 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
18 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
19 | float interS = width * height;
20 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
21 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
22 | return interS / (Sa + Sb - interS);
23 | }
24 |
25 | __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
26 | const float *dev_boxes, unsigned long long *dev_mask) {
27 | const int row_start = blockIdx.y;
28 | const int col_start = blockIdx.x;
29 |
30 | // if (row_start > col_start) return;
31 |
32 | const int row_size =
33 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
34 | const int col_size =
35 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
36 |
37 | __shared__ float block_boxes[threadsPerBlock * 6];
38 | if (threadIdx.x < col_size) {
39 | block_boxes[threadIdx.x * 6 + 0] =
40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
41 | block_boxes[threadIdx.x * 6 + 1] =
42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
43 | block_boxes[threadIdx.x * 6 + 2] =
44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
45 | block_boxes[threadIdx.x * 6 + 3] =
46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
47 | block_boxes[threadIdx.x * 6 + 4] =
48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
49 | block_boxes[threadIdx.x * 6 + 5] =
50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
51 | }
52 | __syncthreads();
53 |
54 | if (threadIdx.x < row_size) {
55 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
56 | const float *cur_box = dev_boxes + cur_box_idx * 6;
57 | int i = 0;
58 | unsigned long long t = 0;
59 | int start = 0;
60 | if (row_start == col_start) {
61 | start = threadIdx.x + 1;
62 | }
63 | for (i = start; i < col_size; i++) {
64 | if (devIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) {
65 | t |= 1ULL << i;
66 | }
67 | }
68 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
69 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
70 | }
71 | }
72 |
73 | namespace det {
74 |
75 | // boxes is a N x 6 tensor
76 | at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) {
77 | using scalar_t = float;
78 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
79 | auto scores = boxes.select(1, 4);
80 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
81 | auto boxes_sorted = boxes.index_select(0, order_t);
82 |
83 | int boxes_num = boxes.size(0);
84 |
85 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
86 |
87 | scalar_t* boxes_dev = boxes_sorted.data();
88 |
89 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
90 |
91 | unsigned long long* mask_dev = NULL;
92 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
93 | // boxes_num * col_blocks * sizeof(unsigned long long)));
94 |
95 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
96 |
97 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
98 | THCCeilDiv(boxes_num, threadsPerBlock));
99 | dim3 threads(threadsPerBlock);
100 | ml_nms_kernel<<>>(boxes_num,
101 | nms_overlap_thresh,
102 | boxes_dev,
103 | mask_dev);
104 |
105 | std::vector mask_host(boxes_num * col_blocks);
106 | THCudaCheck(cudaMemcpy(&mask_host[0],
107 | mask_dev,
108 | sizeof(unsigned long long) * boxes_num * col_blocks,
109 | cudaMemcpyDeviceToHost));
110 |
111 | std::vector remv(col_blocks);
112 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
113 |
114 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
115 | int64_t* keep_out = keep.data();
116 |
117 | int num_to_keep = 0;
118 | for (int i = 0; i < boxes_num; i++) {
119 | int nblock = i / threadsPerBlock;
120 | int inblock = i % threadsPerBlock;
121 |
122 | if (!(remv[nblock] & (1ULL << inblock))) {
123 | keep_out[num_to_keep++] = i;
124 | unsigned long long *p = &mask_host[0] + i * col_blocks;
125 | for (int j = nblock; j < col_blocks; j++) {
126 | remv[j] |= p[j];
127 | }
128 | }
129 | }
130 |
131 | THCudaFree(state, mask_dev);
132 | // TODO improve this part
133 | return std::get<0>(order_t.index({
134 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
135 | order_t.device(), keep.scalar_type())
136 | }).sort(0, false));
137 | }
138 |
139 | } // namespace det
--------------------------------------------------------------------------------
/det/layers/csrc/ml_nms/ml_nms.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace det {
5 |
6 |
7 | #ifdef WITH_CUDA
8 | at::Tensor ml_nms_cuda(
9 | const at::Tensor dets,
10 | const float threshold);
11 | #endif
12 |
13 | at::Tensor ml_nms(const at::Tensor& dets,
14 | const at::Tensor& scores,
15 | const at::Tensor& labels,
16 | const float threshold) {
17 |
18 | if (dets.type().is_cuda()) {
19 | #ifdef WITH_CUDA
20 | // TODO raise error if not compiled with CUDA
21 | if (dets.numel() == 0)
22 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
23 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
24 | return ml_nms_cuda(b, threshold);
25 | #else
26 | AT_ERROR("Not compiled with GPU support");
27 | #endif
28 | }
29 | AT_ERROR("CPU version not implemented");
30 | }
31 |
32 | } // namespace det
33 |
--------------------------------------------------------------------------------
/det/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 |
3 | #include "ml_nms/ml_nms.h"
4 | #include "DefROIAlign/DefROIAlign.h"
5 | #include "BezierAlign/BezierAlign.h"
6 |
7 | namespace det {
8 |
9 | #ifdef WITH_CUDA
10 | extern int get_cudart_version();
11 | #endif
12 |
13 | std::string get_cuda_version() {
14 | #ifdef WITH_CUDA
15 | std::ostringstream oss;
16 |
17 | // copied from
18 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
19 | auto printCudaStyleVersion = [&](int v) {
20 | oss << (v / 1000) << "." << (v / 10 % 100);
21 | if (v % 10 != 0) {
22 | oss << "." << (v % 10);
23 | }
24 | };
25 | printCudaStyleVersion(get_cudart_version());
26 | return oss.str();
27 | #else
28 | return std::string("not available");
29 | #endif
30 | }
31 |
32 | // similar to
33 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
34 | std::string get_compiler_version() {
35 | std::ostringstream ss;
36 | #if defined(__GNUC__)
37 | #ifndef __clang__
38 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
39 | #endif
40 | #endif
41 |
42 | #if defined(__clang_major__)
43 | {
44 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
45 | << __clang_patchlevel__;
46 | }
47 | #endif
48 |
49 | #if defined(_MSC_VER)
50 | { ss << "MSVC " << _MSC_FULL_VER; }
51 | #endif
52 | return ss.str();
53 | }
54 |
55 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
56 | m.def("ml_nms", &ml_nms, "Multi-Label NMS");
57 | m.def("def_roi_align_forward", &DefROIAlign_forward, "def_roi_align_forward");
58 | m.def("def_roi_align_backward", &DefROIAlign_backward, "def_roi_align_backward");
59 | m.def("bezier_align_forward", &BezierAlign_forward, "bezier_align_forward");
60 | m.def("bezier_align_backward", &BezierAlign_backward, "bezier_align_backward");
61 | }
62 |
63 | } // namespace det
64 |
--------------------------------------------------------------------------------
/det/layers/def_roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.nn.modules.utils import _pair
6 |
7 | from det import _C
8 |
9 |
10 | class _DefROIAlign(Function):
11 | @staticmethod
12 | def forward(ctx, input, roi, offsets, output_size, spatial_scale, sampling_ratio, trans_std, aligned):
13 | ctx.save_for_backward(input, roi, offsets)
14 | ctx.output_size = _pair(output_size)
15 | ctx.spatial_scale = spatial_scale
16 | ctx.sampling_ratio = sampling_ratio
17 | ctx.trans_std = trans_std
18 | ctx.input_shape = input.size()
19 | ctx.aligned = aligned
20 | output = _C.def_roi_align_forward(
21 | input, roi, offsets, spatial_scale, output_size[0], output_size[1],
22 | sampling_ratio, trans_std, aligned
23 | )
24 | return output
25 |
26 | @staticmethod
27 | @once_differentiable
28 | def backward(ctx, grad_output):
29 | data, rois, offsets = ctx.saved_tensors
30 | output_size = ctx.output_size
31 | spatial_scale = ctx.spatial_scale
32 | sampling_ratio = ctx.sampling_ratio
33 | trans_std = ctx.trans_std
34 | bs, ch, h, w = ctx.input_shape
35 | grad_offsets = torch.zeros_like(offsets)
36 |
37 | grad_input = _C.def_roi_align_backward(
38 | data,
39 | grad_output,
40 | rois,
41 | offsets,
42 | grad_offsets,
43 | spatial_scale,
44 | output_size[0],
45 | output_size[1],
46 | bs,
47 | ch,
48 | h,
49 | w,
50 | sampling_ratio,
51 | trans_std,
52 | ctx.aligned,
53 | )
54 | return grad_input, None, grad_offsets, None, None, None, None, None
55 |
56 |
57 | def_roi_align = _DefROIAlign.apply
58 |
59 |
60 | class DefROIAlign(nn.Module):
61 | def __init__(self, output_size, spatial_scale,
62 | sampling_ratio, trans_std, aligned=True):
63 | """
64 | Args:
65 | output_size (tuple): h, w
66 | spatial_scale (float): scale the input boxes by this number
67 | sampling_ratio (int): number of inputs samples to take for each output
68 | sample. 0 to take samples densely.
69 | trans_std (float): offset scale according to the normalized roi size
70 | aligned (bool): if False, use the legacy implementation in
71 | Detectron. If True, align the results more perfectly.
72 | """
73 | super(DefROIAlign, self).__init__()
74 | self.output_size = output_size
75 | self.spatial_scale = spatial_scale
76 | self.sampling_ratio = sampling_ratio
77 | self.trans_std = trans_std
78 | self.aligned = aligned
79 |
80 | def forward(self, input, rois, offsets):
81 | """
82 | Args:
83 | input: NCHW images
84 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
85 | """
86 | assert rois.dim() == 2 and rois.size(1) == 5
87 | return def_roi_align(
88 | input, rois, offsets, self.output_size,
89 | self.spatial_scale, self.sampling_ratio,
90 | self.trans_std, self.aligned
91 | )
92 |
93 | def __repr__(self):
94 | tmpstr = self.__class__.__name__ + "("
95 | tmpstr += "output_size=" + str(self.output_size)
96 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
97 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
98 | tmpstr += ", trans_std=" + str(self.trans_std)
99 | tmpstr += ", aligned=" + str(self.aligned)
100 | tmpstr += ")"
101 | return tmpstr
102 |
--------------------------------------------------------------------------------
/det/layers/deform_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from detectron2.layers import Conv2d
5 |
6 |
7 | class _NewEmptyTensorOp(torch.autograd.Function):
8 | @staticmethod
9 | def forward(ctx, x, new_shape):
10 | ctx.shape = x.shape
11 | return x.new_empty(new_shape)
12 |
13 | @staticmethod
14 | def backward(ctx, grad):
15 | shape = ctx.shape
16 | return _NewEmptyTensorOp.apply(grad, shape), None
17 |
18 |
19 | class DFConv2d(nn.Module):
20 | """
21 | Deformable convolutional layer with configurable
22 | deformable groups, dilations and groups.
23 |
24 | Code is from:
25 | https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py
26 |
27 |
28 | """
29 | def __init__(
30 | self,
31 | in_channels,
32 | out_channels,
33 | with_modulated_dcn=True,
34 | kernel_size=3,
35 | stride=1,
36 | groups=1,
37 | dilation=1,
38 | deformable_groups=1,
39 | bias=False,
40 | padding=None
41 | ):
42 | super(DFConv2d, self).__init__()
43 | if isinstance(kernel_size, (list, tuple)):
44 | assert isinstance(stride, (list, tuple))
45 | assert isinstance(dilation, (list, tuple))
46 | assert len(kernel_size) == 2
47 | assert len(stride) == 2
48 | assert len(dilation) == 2
49 | padding = (
50 | dilation[0] * (kernel_size[0] - 1) // 2,
51 | dilation[1] * (kernel_size[1] - 1) // 2
52 | )
53 | offset_base_channels = kernel_size[0] * kernel_size[1]
54 | else:
55 | padding = dilation * (kernel_size - 1) // 2
56 | offset_base_channels = kernel_size * kernel_size
57 | if with_modulated_dcn:
58 | from detectron2.layers.deform_conv import ModulatedDeformConv
59 | offset_channels = offset_base_channels * 3 # default: 27
60 | conv_block = ModulatedDeformConv
61 | else:
62 | from detectron2.layers.deform_conv import DeformConv
63 | offset_channels = offset_base_channels * 2 # default: 18
64 | conv_block = DeformConv
65 | self.offset = Conv2d(
66 | in_channels,
67 | deformable_groups * offset_channels,
68 | kernel_size=kernel_size,
69 | stride=stride,
70 | padding=padding,
71 | groups=1,
72 | dilation=dilation
73 | )
74 | for l in [self.offset, ]:
75 | nn.init.kaiming_uniform_(l.weight, a=1)
76 | torch.nn.init.constant_(l.bias, 0.)
77 | self.conv = conv_block(
78 | in_channels,
79 | out_channels,
80 | kernel_size=kernel_size,
81 | stride=stride,
82 | padding=padding,
83 | dilation=dilation,
84 | groups=groups,
85 | deformable_groups=deformable_groups,
86 | bias=bias
87 | )
88 | self.with_modulated_dcn = with_modulated_dcn
89 | self.kernel_size = kernel_size
90 | self.stride = stride
91 | self.padding = padding
92 | self.dilation = dilation
93 | self.offset_split = offset_base_channels * deformable_groups * 2
94 |
95 | def forward(self, x, return_offset=False):
96 | if x.numel() > 0:
97 | if not self.with_modulated_dcn:
98 | offset_mask = self.offset(x)
99 | x = self.conv(x, offset_mask)
100 | else:
101 | offset_mask = self.offset(x)
102 | offset = offset_mask[:, :self.offset_split, :, :]
103 | mask = offset_mask[:, self.offset_split:, :, :].sigmoid()
104 | x = self.conv(x, offset, mask)
105 | if return_offset:
106 | return x, offset_mask
107 | return x
108 | # get output shape
109 | output_shape = [
110 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1
111 | for i, p, di, k, d in zip(
112 | x.shape[-2:],
113 | self.padding,
114 | self.dilation,
115 | self.kernel_size,
116 | self.stride
117 | )
118 | ]
119 | output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape
120 | return _NewEmptyTensorOp.apply(x, output_shape)
121 |
--------------------------------------------------------------------------------
/det/layers/gcn.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class Conv2D(nn.Module):
7 | def __init__(self, in_channels, out_channels, kernel_size, padding='same',
8 | stride=1, dilation=1, groups=1):
9 | super(Conv2D, self).__init__()
10 |
11 | assert type(kernel_size) in [int, tuple], "Allowed kernel type [int or tuple], not {}".format(type(kernel_size))
12 | assert padding == 'same', "Allowed padding type {}, not {}".format('same', padding)
13 |
14 | self.kernel_size = kernel_size
15 | if isinstance(kernel_size, tuple):
16 | self.h_kernel = kernel_size[0]
17 | self.w_kernel = kernel_size[1]
18 | else:
19 | self.h_kernel = kernel_size
20 | self.w_kernel = kernel_size
21 |
22 | self.padding = padding
23 | self.stride = stride
24 | self.dilation = dilation
25 | self.groups = groups
26 | self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
27 | stride=self.stride, dilation=self.dilation, groups=self.groups)
28 |
29 | def forward(self, x):
30 |
31 | if self.padding == 'same':
32 |
33 | height, width = x.shape[2:]
34 |
35 | h_pad_need = max(0, (height - 1) * self.stride + self.h_kernel - height)
36 | w_pad_need = max(0, (width - 1) * self.stride + self.w_kernel - width)
37 |
38 | pad_left = w_pad_need // 2
39 | pad_right = w_pad_need - pad_left
40 | pad_top = h_pad_need // 2
41 | pad_bottom = h_pad_need - pad_top
42 |
43 | padding = (pad_left, pad_right, pad_top, pad_bottom)
44 |
45 | x = F.pad(x, padding, 'constant', 0)
46 |
47 | x = self.conv(x)
48 |
49 | return x
50 |
51 |
52 | class GCN(nn.Module):
53 | """
54 | Large Kernel Matters -- https://arxiv.org/abs/1703.02719
55 | """
56 | def __init__(self, in_channels, out_channels, k=3):
57 | super(GCN, self).__init__()
58 |
59 | self.conv_l1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
60 | self.conv_l2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
61 |
62 | self.conv_r1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
63 | self.conv_r2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
64 |
65 | def forward(self, x):
66 | x1 = self.conv_l1(x)
67 | x1 = self.conv_l2(x1)
68 |
69 | x2 = self.conv_r1(x)
70 | x2 = self.conv_r2(x2)
71 |
72 | out = x1 + x2
73 |
74 | return out
75 |
--------------------------------------------------------------------------------
/det/layers/iou_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class IOULoss(nn.Module):
6 | """
7 | Intersetion Over Union (IoU) loss which supports three
8 | different IoU computations:
9 |
10 | * IoU
11 | * Linear IoU
12 | * gIoU
13 | """
14 | def __init__(self, loc_loss_type='iou'):
15 | super(IOULoss, self).__init__()
16 | self.loc_loss_type = loc_loss_type
17 |
18 | def forward(self, pred, target, weight=None):
19 | """
20 | Args:
21 | pred: Nx4 predicted bounding boxes
22 | target: Nx4 target bounding boxes
23 | weight: N loss weight for each instance
24 | """
25 | pred_left = pred[:, 0]
26 | pred_top = pred[:, 1]
27 | pred_right = pred[:, 2]
28 | pred_bottom = pred[:, 3]
29 |
30 | target_left = target[:, 0]
31 | target_top = target[:, 1]
32 | target_right = target[:, 2]
33 | target_bottom = target[:, 3]
34 |
35 | target_aera = (target_left + target_right) * \
36 | (target_top + target_bottom)
37 | pred_aera = (pred_left + pred_right) * \
38 | (pred_top + pred_bottom)
39 |
40 | w_intersect = torch.min(pred_left, target_left) + \
41 | torch.min(pred_right, target_right)
42 | h_intersect = torch.min(pred_bottom, target_bottom) + \
43 | torch.min(pred_top, target_top)
44 |
45 | g_w_intersect = torch.max(pred_left, target_left) + \
46 | torch.max(pred_right, target_right)
47 | g_h_intersect = torch.max(pred_bottom, target_bottom) + \
48 | torch.max(pred_top, target_top)
49 | ac_uion = g_w_intersect * g_h_intersect
50 |
51 | area_intersect = w_intersect * h_intersect
52 | area_union = target_aera + pred_aera - area_intersect
53 |
54 | ious = (area_intersect + 1.0) / (area_union + 1.0)
55 | gious = ious - (ac_uion - area_union) / ac_uion
56 | if self.loc_loss_type == 'iou':
57 | losses = -torch.log(ious)
58 | elif self.loc_loss_type == 'linear_iou':
59 | losses = 1 - ious
60 | elif self.loc_loss_type == 'giou':
61 | losses = 1 - gious
62 | else:
63 | raise NotImplementedError
64 |
65 | if weight is not None:
66 | return (losses * weight).sum()
67 | else:
68 | return losses.sum()
69 |
--------------------------------------------------------------------------------
/det/layers/ml_nms.py:
--------------------------------------------------------------------------------
1 | from detectron2.layers import batched_nms
2 | from .soft_nms import batched_soft_nms
3 |
4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1,
5 | score_field="scores", label_field="labels"):
6 | """
7 | Performs non-maximum suppression on a boxlist, with scores specified
8 | in a boxlist field via score_field.
9 |
10 | Args:
11 | boxlist (detectron2.structures.Boxes):
12 | nms_thresh (float):
13 | max_proposals (int): if > 0, then only the top max_proposals are kept
14 | after non-maximum suppression
15 | score_field (str):
16 | """
17 | if nms_thresh <= 0:
18 | return boxlist
19 | boxes = boxlist.pred_boxes.tensor
20 | scores = boxlist.scores
21 | labels = boxlist.pred_classes
22 | keep = batched_nms(boxes, scores, labels, nms_thresh)
23 | print(type(keep))
24 | quit()
25 | if max_proposals > 0:
26 | keep = keep[: max_proposals]
27 | boxlist = boxlist[keep]
28 | return boxlist
29 |
--------------------------------------------------------------------------------
/det/layers/naive_group_norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Module, Parameter
3 | from torch.nn import init
4 |
5 |
6 | class NaiveGroupNorm(Module):
7 | r"""NaiveGroupNorm implements Group Normalization with the high-level matrix operations in PyTorch.
8 | It is a temporary solution to export GN by ONNX before the official GN can be exported by ONNX.
9 | The usage of NaiveGroupNorm is exactly the same as the official :class:`torch.nn.GroupNorm`.
10 | Args:
11 | num_groups (int): number of groups to separate the channels into
12 | num_channels (int): number of channels expected in input
13 | eps: a value added to the denominator for numerical stability. Default: 1e-5
14 | affine: a boolean value that when set to ``True``, this module
15 | has learnable per-channel affine parameters initialized to ones (for weights)
16 | and zeros (for biases). Default: ``True``.
17 |
18 | Shape:
19 | - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
20 | - Output: :math:`(N, C, *)` (same shape as input)
21 |
22 | Examples::
23 |
24 | >>> input = torch.randn(20, 6, 10, 10)
25 | >>> # Separate 6 channels into 3 groups
26 | >>> m = NaiveGroupNorm(3, 6)
27 | >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
28 | >>> m = NaiveGroupNorm(6, 6)
29 | >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
30 | >>> m = NaiveGroupNorm(1, 6)
31 | >>> # Activating the module
32 | >>> output = m(input)
33 |
34 | .. _`Group Normalization`: https://arxiv.org/abs/1803.08494
35 | """
36 | __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight',
37 | 'bias']
38 |
39 | def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
40 | super(NaiveGroupNorm, self).__init__()
41 | self.num_groups = num_groups
42 | self.num_channels = num_channels
43 | self.eps = eps
44 | self.affine = affine
45 | if self.affine:
46 | self.weight = Parameter(torch.Tensor(num_channels))
47 | self.bias = Parameter(torch.Tensor(num_channels))
48 | else:
49 | self.register_parameter('weight', None)
50 | self.register_parameter('bias', None)
51 | self.reset_parameters()
52 |
53 | def reset_parameters(self):
54 | if self.affine:
55 | init.ones_(self.weight)
56 | init.zeros_(self.bias)
57 |
58 | def forward(self, input):
59 | N, C, H, W = input.size()
60 | assert C % self.num_groups == 0
61 | input = input.reshape(N, self.num_groups, -1)
62 | mean = input.mean(dim=-1, keepdim=True)
63 | var = (input ** 2).mean(dim=-1, keepdim=True) - mean ** 2
64 | std = torch.sqrt(var + self.eps)
65 |
66 | input = (input - mean) / std
67 | input = input.reshape(N, C, H, W)
68 | if self.affine:
69 | input = input * self.weight.reshape(1, C, 1, 1) + self.bias.reshape(1, C, 1, 1)
70 | return input
71 |
72 | def extra_repr(self):
73 | return '{num_groups}, {num_channels}, eps={eps}, ' \
74 | 'affine={affine}'.format(**self.__dict__)
75 |
--------------------------------------------------------------------------------
/det/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .fcos import FCOS
3 | from .atss import ATSS
4 | from .backbone import build_fcos_resnet_fpn_backbone
5 | from .one_stage_detector import OneStageDetector, OneStageRCNN
6 | from .roi_heads import LibraRCNNROIHeads
7 |
8 | from .rpn_utils import ModuleListDial, Scale, BoxCoder, permute_and_flatten, concat_box_prediction_layers, ATSSAnchorGenerator
9 |
10 | _EXCLUDE = {"torch", "ShapeSpec"}
11 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
--------------------------------------------------------------------------------
/det/modeling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/__pycache__/one_stage_detector.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/one_stage_detector.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/__pycache__/rpn_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/rpn_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/atss/__init__.py:
--------------------------------------------------------------------------------
1 | from .atss import ATSS
--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/atss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import build_fcos_resnet_fpn_backbone, build_resnest_fpn_backbone
2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone
3 | from .dla import build_fcos_dla_fpn_backbone
4 | from .resnet_lpf import build_resnet_lpf_backbone
5 | from .resnet import build_resnest_backbone
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/dla.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/dla.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/fpn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/fpn.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/lpf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/lpf.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/splat.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/splat.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch.nn.functional as F
3 | import fvcore.nn.weight_init as weight_init
4 |
5 | from detectron2.modeling.backbone import FPN, build_resnet_backbone
6 | from detectron2.layers import ShapeSpec
7 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
8 |
9 | from .resnet_lpf import build_resnet_lpf_backbone
10 | from .resnet_interval import build_resnet_interval_backbone
11 | from .mobilenet import build_mnv2_backbone
12 | from .resnet import build_resnest_backbone
13 |
14 | class LastLevelP6P7(nn.Module):
15 | """
16 | This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from
17 | C5 or P5 feature.
18 | """
19 |
20 | def __init__(self, in_channels, out_channels, in_features="res5"):
21 | super().__init__()
22 | self.num_levels = 2
23 | self.in_feature = in_features
24 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
25 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
26 | for module in [self.p6, self.p7]:
27 | weight_init.c2_xavier_fill(module)
28 |
29 | def forward(self, x):
30 | p6 = self.p6(x)
31 | p7 = self.p7(F.relu(p6))
32 | return [p6, p7]
33 |
34 |
35 | class LastLevelP6(nn.Module):
36 | """
37 | This module is used in FCOS to generate extra layers
38 | """
39 |
40 | def __init__(self, in_channels, out_channels, in_features="res5"):
41 | super().__init__()
42 | self.num_levels = 1
43 | self.in_feature = in_features
44 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
45 | for module in [self.p6]:
46 | weight_init.c2_xavier_fill(module)
47 |
48 | def forward(self, x):
49 | p6 = self.p6(x)
50 | return [p6]
51 |
52 |
53 | @BACKBONE_REGISTRY.register()
54 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
55 | """
56 | Args:
57 | cfg: a detectron2 CfgNode
58 |
59 | Returns:
60 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
61 | """
62 | if cfg.MODEL.BACKBONE.ANTI_ALIAS:
63 | bottom_up = build_resnet_lpf_backbone(cfg, input_shape)
64 | elif cfg.MODEL.MOBILENET:
65 | bottom_up = build_mnv2_backbone(cfg, input_shape)
66 | elif cfg.MODEL.RESNEST:
67 | bottom_up = build_resnest_backbone(cfg, input_shape)
68 | else:
69 | bottom_up = build_resnet_backbone(cfg, input_shape)
70 | in_features = cfg.MODEL.FPN.IN_FEATURES
71 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS
72 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS
73 | in_channels_top = out_channels
74 | if top_levels == 2:
75 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
76 | if top_levels == 1:
77 | top_block = LastLevelP6(in_channels_top, out_channels, "p5")
78 | elif top_levels == 0:
79 | top_block = None
80 | backbone = FPN(
81 | bottom_up=bottom_up,
82 | in_features=in_features,
83 | out_channels=out_channels,
84 | norm=cfg.MODEL.FPN.NORM,
85 | top_block=top_block,
86 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
87 | )
88 | return backbone
89 |
90 | @BACKBONE_REGISTRY.register()
91 | def build_resnest_fpn_backbone(cfg, input_shape: ShapeSpec):
92 | if cfg.MODEL.RESNEST:
93 | bottom_up = build_resnest_backbone(cfg, input_shape)
94 | else:
95 | bottom_up = build_resnet_backbone(cfg, input_shape)
96 | in_features = cfg.MODEL.FPN.IN_FEATURES
97 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS
98 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS
99 | in_channels_top = out_channels
100 | if top_levels == 2:
101 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
102 | if top_levels == 1:
103 | top_block = LastLevelP6(in_channels_top, out_channels, "p5")
104 | elif top_levels == 0:
105 | top_block = None
106 | backbone = FPN(
107 | bottom_up=bottom_up,
108 | in_features=in_features,
109 | out_channels=out_channels,
110 | norm=cfg.MODEL.FPN.NORM,
111 | top_block=top_block,
112 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
113 | )
114 | return backbone
115 |
116 |
117 | #BiFPN
118 | #https://github.com/sxhxliang/detectron2_backbone/blob/master/detectron2_backbone/backbone/bifpn.py
--------------------------------------------------------------------------------
/det/modeling/backbone/lpf.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.parallel
3 | import numpy as np
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from IPython import embed
7 |
8 |
9 | class Downsample(nn.Module):
10 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
11 | super(Downsample, self).__init__()
12 | self.filt_size = filt_size
13 | self.pad_off = pad_off
14 | self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))]
15 | self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes]
16 | self.stride = stride
17 | self.off = int((self.stride-1)/2.)
18 | self.channels = channels
19 |
20 | # print('Filter size [%i]'%filt_size)
21 | if(self.filt_size==1):
22 | a = np.array([1.,])
23 | elif(self.filt_size==2):
24 | a = np.array([1., 1.])
25 | elif(self.filt_size==3):
26 | a = np.array([1., 2., 1.])
27 | elif(self.filt_size==4):
28 | a = np.array([1., 3., 3., 1.])
29 | elif(self.filt_size==5):
30 | a = np.array([1., 4., 6., 4., 1.])
31 | elif(self.filt_size==6):
32 | a = np.array([1., 5., 10., 10., 5., 1.])
33 | elif(self.filt_size==7):
34 | a = np.array([1., 6., 15., 20., 15., 6., 1.])
35 |
36 | filt = torch.Tensor(a[:,None]*a[None,:])
37 | filt = filt/torch.sum(filt)
38 | self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1)))
39 |
40 | self.pad = get_pad_layer(pad_type)(self.pad_sizes)
41 |
42 | def forward(self, inp):
43 | if(self.filt_size==1):
44 | if(self.pad_off==0):
45 | return inp[:,:,::self.stride,::self.stride]
46 | else:
47 | return self.pad(inp)[:,:,::self.stride,::self.stride]
48 | else:
49 | return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
50 |
51 | def get_pad_layer(pad_type):
52 | if(pad_type in ['refl','reflect']):
53 | PadLayer = nn.ReflectionPad2d
54 | elif(pad_type in ['repl','replicate']):
55 | PadLayer = nn.ReplicationPad2d
56 | elif(pad_type=='zero'):
57 | PadLayer = nn.ZeroPad2d
58 | else:
59 | print('Pad type [%s] not recognized'%pad_type)
60 | return PadLayer
61 |
62 |
63 | class Downsample1D(nn.Module):
64 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
65 | super(Downsample1D, self).__init__()
66 | self.filt_size = filt_size
67 | self.pad_off = pad_off
68 | self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))]
69 | self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes]
70 | self.stride = stride
71 | self.off = int((self.stride - 1) / 2.)
72 | self.channels = channels
73 |
74 | # print('Filter size [%i]' % filt_size)
75 | if(self.filt_size == 1):
76 | a = np.array([1., ])
77 | elif(self.filt_size == 2):
78 | a = np.array([1., 1.])
79 | elif(self.filt_size == 3):
80 | a = np.array([1., 2., 1.])
81 | elif(self.filt_size == 4):
82 | a = np.array([1., 3., 3., 1.])
83 | elif(self.filt_size == 5):
84 | a = np.array([1., 4., 6., 4., 1.])
85 | elif(self.filt_size == 6):
86 | a = np.array([1., 5., 10., 10., 5., 1.])
87 | elif(self.filt_size == 7):
88 | a = np.array([1., 6., 15., 20., 15., 6., 1.])
89 |
90 | filt = torch.Tensor(a)
91 | filt = filt / torch.sum(filt)
92 | self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1)))
93 |
94 | self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes)
95 |
96 | def forward(self, inp):
97 | if(self.filt_size == 1):
98 | if(self.pad_off == 0):
99 | return inp[:, :, ::self.stride]
100 | else:
101 | return self.pad(inp)[:, :, ::self.stride]
102 | else:
103 | return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
104 |
105 |
106 | def get_pad_layer_1d(pad_type):
107 | if(pad_type in ['refl', 'reflect']):
108 | PadLayer = nn.ReflectionPad1d
109 | elif(pad_type in ['repl', 'replicate']):
110 | PadLayer = nn.ReplicationPad1d
111 | elif(pad_type == 'zero'):
112 | PadLayer = nn.ZeroPad1d
113 | else:
114 | print('Pad type [%s] not recognized' % pad_type)
115 | return PadLayer
116 |
--------------------------------------------------------------------------------
/det/modeling/backbone/mobilenet.py:
--------------------------------------------------------------------------------
1 | # taken from https://github.com/tonylins/pytorch-mobilenet-v2/
2 | # Published by Ji Lin, tonylins
3 | # licensed under the Apache License, Version 2.0, January 2004
4 |
5 | from torch import nn
6 | from torch.nn import BatchNorm2d
7 | #from detectron2.layers.batch_norm import NaiveSyncBatchNorm as BatchNorm2d
8 | from detectron2.layers import Conv2d
9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
10 | from detectron2.modeling.backbone import Backbone
11 |
12 |
13 | def conv_bn(inp, oup, stride):
14 | return nn.Sequential(
15 | Conv2d(inp, oup, 3, stride, 1, bias=False),
16 | BatchNorm2d(oup),
17 | nn.ReLU6(inplace=True)
18 | )
19 |
20 |
21 | def conv_1x1_bn(inp, oup):
22 | return nn.Sequential(
23 | Conv2d(inp, oup, 1, 1, 0, bias=False),
24 | BatchNorm2d(oup),
25 | nn.ReLU6(inplace=True)
26 | )
27 |
28 |
29 | class InvertedResidual(nn.Module):
30 | def __init__(self, inp, oup, stride, expand_ratio):
31 | super(InvertedResidual, self).__init__()
32 | self.stride = stride
33 | assert stride in [1, 2]
34 |
35 | hidden_dim = int(round(inp * expand_ratio))
36 | self.use_res_connect = self.stride == 1 and inp == oup
37 |
38 | if expand_ratio == 1:
39 | self.conv = nn.Sequential(
40 | # dw
41 | Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
42 | BatchNorm2d(hidden_dim),
43 | nn.ReLU6(inplace=True),
44 | # pw-linear
45 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
46 | BatchNorm2d(oup),
47 | )
48 | else:
49 | self.conv = nn.Sequential(
50 | # pw
51 | Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
52 | BatchNorm2d(hidden_dim),
53 | nn.ReLU6(inplace=True),
54 | # dw
55 | Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
56 | BatchNorm2d(hidden_dim),
57 | nn.ReLU6(inplace=True),
58 | # pw-linear
59 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
60 | BatchNorm2d(oup),
61 | )
62 |
63 | def forward(self, x):
64 | if self.use_res_connect:
65 | return x + self.conv(x)
66 | else:
67 | return self.conv(x)
68 |
69 |
70 | class MobileNetV2(Backbone):
71 | """
72 | Should freeze bn
73 | """
74 | def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.):
75 | super(MobileNetV2, self).__init__()
76 | block = InvertedResidual
77 | input_channel = 32
78 | interverted_residual_setting = [
79 | # t, c, n, s
80 | [1, 16, 1, 1],
81 | [6, 24, 2, 2],
82 | [6, 32, 3, 2],
83 | [6, 64, 4, 2],
84 | [6, 96, 3, 1],
85 | [6, 160, 3, 2],
86 | [6, 320, 1, 1],
87 | ]
88 |
89 | # building first layer
90 | assert input_size % 32 == 0
91 | input_channel = int(input_channel * width_mult)
92 | self.return_features_indices = [3, 6, 13, 17]
93 | self.return_features_num_channels = []
94 | self.features = nn.ModuleList([conv_bn(3, input_channel, 2)])
95 | # building inverted residual blocks
96 | for t, c, n, s in interverted_residual_setting:
97 | output_channel = int(c * width_mult)
98 | for i in range(n):
99 | if i == 0:
100 | self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
101 | else:
102 | self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
103 | input_channel = output_channel
104 | if len(self.features) - 1 in self.return_features_indices:
105 | self.return_features_num_channels.append(output_channel)
106 |
107 | self._initialize_weights()
108 | self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT)
109 |
110 | def _freeze_backbone(self, freeze_at):
111 | for layer_index in range(freeze_at):
112 | for p in self.features[layer_index].parameters():
113 | p.requires_grad = False
114 |
115 | def forward(self, x):
116 | res = []
117 | for i, m in enumerate(self.features):
118 | x = m(x)
119 | if i in self.return_features_indices:
120 | res.append(x)
121 | return {'res{}'.format(i + 2): r for i, r in enumerate(res)}
122 |
123 | def _initialize_weights(self):
124 | for m in self.modules():
125 | if isinstance(m, Conv2d):
126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 | m.weight.data.normal_(0, (2. / n) ** 0.5)
128 | if m.bias is not None:
129 | m.bias.data.zero_()
130 | elif isinstance(m, BatchNorm2d):
131 | m.weight.data.fill_(1)
132 | m.bias.data.zero_()
133 | elif isinstance(m, nn.Linear):
134 | n = m.weight.size(1)
135 | m.weight.data.normal_(0, 0.01)
136 | m.bias.data.zero_()
137 |
138 | @BACKBONE_REGISTRY.register()
139 | def build_mnv2_backbone(cfg, input_shape):
140 | """
141 | Create a ResNet instance from config.
142 |
143 | Returns:
144 | ResNet: a :class:`ResNet` instance.
145 | """
146 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES
147 |
148 | out_feature_channels = {"res2": 24, "res3": 32,
149 | "res4": 96, "res5": 320}
150 | out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32}
151 | model = MobileNetV2(cfg)
152 | model._out_features = out_features
153 | model._out_feature_channels = out_feature_channels
154 | model._out_feature_strides = out_feature_strides
155 | return model
156 |
--------------------------------------------------------------------------------
/det/modeling/backbone/resnet_interval.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from detectron2.layers import FrozenBatchNorm2d
3 | from detectron2.modeling.backbone import BACKBONE_REGISTRY
4 | from detectron2.modeling.backbone.resnet import (
5 | BasicStem,
6 | DeformBottleneckBlock,
7 | BottleneckBlock,
8 | ResNet,
9 | )
10 |
11 |
12 | def make_stage_intervals(block_class, num_blocks, first_stride, **kwargs):
13 | """
14 | Create a resnet stage by creating many blocks.
15 | Args:
16 | block_class (class): a subclass of ResNetBlockBase
17 | num_blocks (int):
18 | first_stride (int): the stride of the first block. The other blocks will have stride=1.
19 | A `stride` argument will be passed to the block constructor.
20 | kwargs: other arguments passed to the block constructor.
21 |
22 | Returns:
23 | list[nn.Module]: a list of block module.
24 | """
25 | blocks = []
26 | conv_kwargs = {key: kwargs[key] for key in kwargs if "deform" not in key}
27 | deform_kwargs = {key: kwargs[key] for key in kwargs if key != "deform_interval"}
28 | deform_interval = kwargs.get("deform_interval", None)
29 | for i in range(num_blocks):
30 | if deform_interval and i % deform_interval == 0:
31 | blocks.append(block_class(stride=first_stride if i == 0 else 1, **deform_kwargs))
32 | else:
33 | blocks.append(BottleneckBlock(stride=first_stride if i == 0 else 1, **conv_kwargs))
34 | conv_kwargs["in_channels"] = conv_kwargs["out_channels"]
35 | deform_kwargs["in_channels"] = deform_kwargs["out_channels"]
36 | return blocks
37 |
38 |
39 | @BACKBONE_REGISTRY.register()
40 | def build_resnet_interval_backbone(cfg, input_shape):
41 | """
42 | Create a ResNet instance from config.
43 |
44 | Returns:
45 | ResNet: a :class:`ResNet` instance.
46 | """
47 | # need registration of new blocks/stems?
48 | norm = cfg.MODEL.RESNETS.NORM
49 | stem = BasicStem(
50 | in_channels=input_shape.channels,
51 | out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
52 | norm=norm,
53 | )
54 | freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT
55 |
56 | if freeze_at >= 1:
57 | for p in stem.parameters():
58 | p.requires_grad = False
59 | stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem)
60 |
61 | # fmt: off
62 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES
63 | depth = cfg.MODEL.RESNETS.DEPTH
64 | num_groups = cfg.MODEL.RESNETS.NUM_GROUPS
65 | width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
66 | bottleneck_channels = num_groups * width_per_group
67 | in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
68 | out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
69 | stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1
70 | res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION
71 | deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
72 | deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED
73 | deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
74 | deform_interval = cfg.MODEL.RESNETS.DEFORM_INTERVAL
75 | # fmt: on
76 | assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
77 |
78 | num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
79 |
80 | stages = []
81 |
82 | # Avoid creating variables without gradients
83 | # It consumes extra memory and may cause allreduce to fail
84 | out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features]
85 | max_stage_idx = max(out_stage_idx)
86 | for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
87 | dilation = res5_dilation if stage_idx == 5 else 1
88 | first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
89 | stage_kargs = {
90 | "num_blocks": num_blocks_per_stage[idx],
91 | "first_stride": first_stride,
92 | "in_channels": in_channels,
93 | "bottleneck_channels": bottleneck_channels,
94 | "out_channels": out_channels,
95 | "num_groups": num_groups,
96 | "norm": norm,
97 | "stride_in_1x1": stride_in_1x1,
98 | "dilation": dilation,
99 | }
100 | if deform_on_per_stage[idx]:
101 | stage_kargs["block_class"] = DeformBottleneckBlock
102 | stage_kargs["deform_modulated"] = deform_modulated
103 | stage_kargs["deform_num_groups"] = deform_num_groups
104 | stage_kargs["deform_interval"] = deform_interval
105 | else:
106 | stage_kargs["block_class"] = BottleneckBlock
107 | blocks = make_stage_intervals(**stage_kargs)
108 | in_channels = out_channels
109 | out_channels *= 2
110 | bottleneck_channels *= 2
111 |
112 | if freeze_at >= stage_idx:
113 | for block in blocks:
114 | block.freeze()
115 | stages.append(blocks)
116 | return ResNet(stem, stages, out_features=out_features)
117 |
--------------------------------------------------------------------------------
/det/modeling/backbone/splat.py:
--------------------------------------------------------------------------------
1 | """Split-Attention"""
2 |
3 | import torch
4 | from torch import nn
5 | import torch.nn.functional as F
6 | from torch.nn import Module, Linear, BatchNorm2d, ReLU
7 | from torch.nn.modules.utils import _pair
8 |
9 | from detectron2.layers import (
10 | Conv2d,
11 | DeformConv,
12 | FrozenBatchNorm2d,
13 | ModulatedDeformConv,
14 | ShapeSpec,
15 | get_norm,
16 | )
17 |
18 | __all__ = ['SplAtConv2d', 'SplAtConv2d_dcn']
19 |
20 | class SplAtConv2d(Module):
21 | """Split-Attention Conv2d
22 | """
23 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
24 | dilation=(1, 1), groups=1, bias=True,
25 | radix=2, reduction_factor=4,
26 | rectify=False, rectify_avg=False, norm=None,
27 | dropblock_prob=0.0, **kwargs):
28 | super(SplAtConv2d, self).__init__()
29 | padding = _pair(padding)
30 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
31 | self.rectify_avg = rectify_avg
32 | inter_channels = max(in_channels*radix//reduction_factor, 32)
33 | self.radix = radix
34 | self.cardinality = groups
35 | self.channels = channels
36 | self.dropblock_prob = dropblock_prob
37 | if self.rectify:
38 | from rfconv import RFConv2d
39 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
40 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
41 | else:
42 | self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
43 | groups=groups*radix, bias=bias, **kwargs)
44 | self.use_bn = norm is not None
45 | if self.use_bn:
46 | self.bn0 = get_norm(norm, channels*radix)
47 | self.relu = ReLU(inplace=True)
48 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
49 | if self.use_bn:
50 | self.bn1 = get_norm(norm, inter_channels)
51 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
52 | if dropblock_prob > 0.0:
53 | self.dropblock = DropBlock2D(dropblock_prob, 3)
54 | self.rsoftmax = rSoftMax(radix, groups)
55 |
56 | def forward(self, x):
57 | x = self.conv(x)
58 | if self.use_bn:
59 | x = self.bn0(x)
60 | if self.dropblock_prob > 0.0:
61 | x = self.dropblock(x)
62 | x = self.relu(x)
63 |
64 | batch, rchannel = x.shape[:2]
65 | if self.radix > 1:
66 | splited = torch.split(x, rchannel//self.radix, dim=1)
67 | gap = sum(splited)
68 | else:
69 | gap = x
70 | gap = F.adaptive_avg_pool2d(gap, 1)
71 | gap = self.fc1(gap)
72 |
73 | if self.use_bn:
74 | gap = self.bn1(gap)
75 | gap = self.relu(gap)
76 |
77 | atten = self.fc2(gap)
78 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
79 |
80 | if self.radix > 1:
81 | attens = torch.split(atten, rchannel//self.radix, dim=1)
82 | out = sum([att*split for (att, split) in zip(attens, splited)])
83 | else:
84 | out = atten * x
85 | return out.contiguous()
86 |
87 | class rSoftMax(nn.Module):
88 | def __init__(self, radix, cardinality):
89 | super().__init__()
90 | self.radix = radix
91 | self.cardinality = cardinality
92 |
93 | def forward(self, x):
94 | batch = x.size(0)
95 | if self.radix > 1:
96 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
97 | x = F.softmax(x, dim=1)
98 | x = x.reshape(batch, -1)
99 | else:
100 | x = torch.sigmoid(x)
101 | return x
102 |
103 |
104 | class SplAtConv2d_dcn(Module):
105 | """Split-Attention Conv2d with dcn
106 | """
107 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
108 | dilation=(1, 1), groups=1, bias=True,
109 | radix=2, reduction_factor=4,
110 | rectify=False, rectify_avg=False, norm=None,
111 | dropblock_prob=0.0,
112 | deform_conv_op=None,
113 | deformable_groups=1,
114 | deform_modulated=False,
115 | **kwargs):
116 | super(SplAtConv2d_dcn, self).__init__()
117 | self.deform_modulated = deform_modulated
118 |
119 | padding = _pair(padding)
120 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
121 | self.rectify_avg = rectify_avg
122 | inter_channels = max(in_channels*radix//reduction_factor, 32)
123 | self.radix = radix
124 | self.cardinality = groups
125 | self.channels = channels
126 | self.dropblock_prob = dropblock_prob
127 | if self.rectify:
128 | from rfconv import RFConv2d
129 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
130 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
131 | else:
132 | self.conv = deform_conv_op(in_channels, channels*radix, kernel_size, stride, padding[0], dilation,
133 | groups=groups*radix, bias=bias, deformable_groups=deformable_groups, **kwargs)
134 | self.use_bn = norm is not None
135 | if self.use_bn:
136 | self.bn0 = get_norm(norm, channels*radix)
137 | self.relu = ReLU(inplace=True)
138 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
139 | if self.use_bn:
140 | self.bn1 = get_norm(norm, inter_channels)
141 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
142 | if dropblock_prob > 0.0:
143 | self.dropblock = DropBlock2D(dropblock_prob, 3)
144 | self.rsoftmax = rSoftMax(radix, groups)
145 |
146 | def forward(self, x, offset_input):
147 |
148 | if self.deform_modulated:
149 | offset_x, offset_y, mask = torch.chunk(offset_input, 3, dim=1)
150 | offset = torch.cat((offset_x, offset_y), dim=1)
151 | mask = mask.sigmoid()
152 | x = self.conv(x, offset, mask)
153 | else:
154 | x = self.conv(x, offset_input)
155 |
156 | if self.use_bn:
157 | x = self.bn0(x)
158 | if self.dropblock_prob > 0.0:
159 | x = self.dropblock(x)
160 | x = self.relu(x)
161 |
162 | batch, rchannel = x.shape[:2]
163 | if self.radix > 1:
164 | splited = torch.split(x, rchannel//self.radix, dim=1)
165 | gap = sum(splited)
166 | else:
167 | gap = x
168 | gap = F.adaptive_avg_pool2d(gap, 1)
169 | gap = self.fc1(gap)
170 |
171 | if self.use_bn:
172 | gap = self.bn1(gap)
173 | gap = self.relu(gap)
174 |
175 | atten = self.fc2(gap)
176 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
177 |
178 | if self.radix > 1:
179 | attens = torch.split(atten, rchannel//self.radix, dim=1)
180 | out = sum([att*split for (att, split) in zip(attens, splited)])
181 | else:
182 | out = atten * x
183 | return out.contiguous()
--------------------------------------------------------------------------------
/det/modeling/fcos/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcos import FCOS
2 |
--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/fcos.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/one_stage_detector.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from torch import nn
3 |
4 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
5 | from detectron2.modeling import ProposalNetwork, GeneralizedRCNN
6 | from detectron2.utils.events import get_event_storage
7 | from detectron2.utils.logger import log_first_n
8 | from detectron2.modeling.postprocessing import detector_postprocess as d2_postprocesss
9 |
10 |
11 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
12 | """
13 | In addition to the post processing of detectron2, we add scalign for
14 | bezier control points.
15 | """
16 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
17 | results = d2_postprocesss(results, output_height, output_width, mask_threshold)
18 |
19 | # scale bezier points
20 | if results.has("beziers"):
21 | beziers = results.beziers
22 | # scale and clip in place
23 | beziers[:, 0::2] *= scale_x
24 | beziers[:, 1::2] *= scale_y
25 | h, w = results.image_size
26 | beziers[:, 0].clamp_(min=0, max=w)
27 | beziers[:, 1].clamp_(min=0, max=h)
28 | beziers[:, 6].clamp_(min=0, max=w)
29 | beziers[:, 7].clamp_(min=0, max=h)
30 | beziers[:, 8].clamp_(min=0, max=w)
31 | beziers[:, 9].clamp_(min=0, max=h)
32 | beziers[:, 14].clamp_(min=0, max=w)
33 | beziers[:, 15].clamp_(min=0, max=h)
34 |
35 | return results
36 |
37 |
38 | @META_ARCH_REGISTRY.register()
39 | class OneStageDetector(ProposalNetwork):
40 | """
41 | Same as :class:`detectron2.modeling.ProposalNetwork`.
42 | Uses "instances" as the return key instead of using "proposal".
43 | """
44 | def forward(self, batched_inputs):
45 | if self.training:
46 | return super().forward(batched_inputs)
47 | processed_results = super().forward(batched_inputs)
48 | processed_results = [{"instances": r["proposals"]} for r in processed_results]
49 | return processed_results
50 |
51 |
52 | def build_top_module(cfg):
53 | top_type = cfg.MODEL.TOP_MODULE.NAME
54 | if top_type == "conv":
55 | inp = cfg.MODEL.FPN.OUT_CHANNELS
56 | oup = cfg.MODEL.TOP_MODULE.DIM
57 | top_module = nn.Conv2d(
58 | inp, oup,
59 | kernel_size=3, stride=1, padding=1)
60 | else:
61 | top_module = None
62 | return top_module
63 |
64 |
65 | @META_ARCH_REGISTRY.register()
66 | class OneStageRCNN(GeneralizedRCNN):
67 | """
68 | Same as :class:`detectron2.modeling.ProposalNetwork`.
69 | Use one stage detector and a second stage for instance-wise prediction.
70 | """
71 | def __init__(self, cfg):
72 | super().__init__(cfg)
73 | self.top_module = build_top_module(cfg)
74 | self.to(self.device)
75 |
76 | def forward(self, batched_inputs):
77 | """
78 | Args:
79 | batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
80 | Each item in the list contains the inputs for one image.
81 | For now, each item in the list is a dict that contains:
82 |
83 | * image: Tensor, image in (C, H, W) format.
84 | * instances (optional): groundtruth :class:`Instances`
85 | * proposals (optional): :class:`Instances`, precomputed proposals.
86 |
87 | Other information that's included in the original dicts, such as:
88 |
89 | * "height", "width" (int): the output resolution of the model, used in inference.
90 | See :meth:`postprocess` for details.
91 |
92 | Returns:
93 | list[dict]:
94 | Each dict is the output for one input image.
95 | The dict contains one key "instances" whose value is a :class:`Instances`.
96 | The :class:`Instances` object has the following keys:
97 | "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
98 | """
99 | if not self.training:
100 | return self.inference(batched_inputs)
101 |
102 | images = self.preprocess_image(batched_inputs)
103 | if "instances" in batched_inputs[0]:
104 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
105 | elif "targets" in batched_inputs[0]:
106 | log_first_n(
107 | logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
108 | )
109 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
110 | else:
111 | gt_instances = None
112 |
113 | features = self.backbone(images.tensor)
114 |
115 | if self.proposal_generator:
116 | proposals, proposal_losses = self.proposal_generator(
117 | images, features, gt_instances, self.top_module)
118 | else:
119 | assert "proposals" in batched_inputs[0]
120 | proposals = [x["proposals"].to(self.device) for x in batched_inputs]
121 | proposal_losses = {}
122 |
123 | _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
124 | if self.vis_period > 0:
125 | storage = get_event_storage()
126 | if storage.iter % self.vis_period == 0:
127 | self.visualize_training(batched_inputs, proposals)
128 |
129 | losses = {}
130 | losses.update(detector_losses)
131 | losses.update(proposal_losses)
132 | return losses
133 |
134 | def inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
135 | """
136 | Run inference on the given inputs.
137 |
138 | Args:
139 | batched_inputs (list[dict]): same as in :meth:`forward`
140 | detected_instances (None or list[Instances]): if not None, it
141 | contains an `Instances` object per image. The `Instances`
142 | object contains "pred_boxes" and "pred_classes" which are
143 | known boxes in the image.
144 | The inference will then skip the detection of bounding boxes,
145 | and only predict other per-ROI outputs.
146 | do_postprocess (bool): whether to apply post-processing on the outputs.
147 |
148 | Returns:
149 | same as in :meth:`forward`.
150 | """
151 | assert not self.training
152 |
153 | images = self.preprocess_image(batched_inputs)
154 | features = self.backbone(images.tensor)
155 |
156 | if detected_instances is None:
157 | if self.proposal_generator:
158 | proposals, _ = self.proposal_generator(
159 | images, features, None, self.top_module)
160 | else:
161 | assert "proposals" in batched_inputs[0]
162 | proposals = [x["proposals"].to(self.device) for x in batched_inputs]
163 |
164 | results, _ = self.roi_heads(images, features, proposals, None)
165 | else:
166 | detected_instances = [x.to(self.device) for x in detected_instances]
167 | results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
168 |
169 | if do_postprocess:
170 | return OneStageRCNN._postprocess(results, batched_inputs, images.image_sizes)
171 | else:
172 | return results
173 |
174 | @staticmethod
175 | def _postprocess(instances, batched_inputs, image_sizes):
176 | """
177 | Rescale the output instances to the target size.
178 | """
179 | # note: private function; subject to changes
180 | processed_results = []
181 | for results_per_image, input_per_image, image_size in zip(
182 | instances, batched_inputs, image_sizes
183 | ):
184 | height = input_per_image.get("height", image_size[0])
185 | width = input_per_image.get("width", image_size[1])
186 | r = detector_postprocess(results_per_image, height, width)
187 | processed_results.append({"instances": r})
188 | return processed_results
--------------------------------------------------------------------------------
/det/modeling/poolers.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | from torch import nn
4 | from detectron2.layers import cat
5 |
6 | from detectron2.modeling.poolers import (
7 | ROIPooler, convert_boxes_to_pooler_format, assign_boxes_to_levels
8 | )
9 |
10 | from det.layers import BezierAlign
11 | from det.structures import Beziers
12 |
13 | __all__ = ["TopPooler"]
14 |
15 |
16 | def _box_max_size(boxes):
17 | box = boxes.tensor
18 | max_size = torch.max(box[:, 2] - box[:, 0], box[:, 3] - box[:, 1])
19 | return max_size
20 |
21 |
22 | def _bezier_height(beziers):
23 | beziers = beziers.tensor
24 | # compute the distance between the first and last control point
25 | p1 = beziers[:, :2]
26 | p2 = beziers[:, 14:]
27 | height = ((p1 - p2) ** 2).sum(dim=1).sqrt()
28 | return height
29 |
30 |
31 | def assign_boxes_to_levels_by_metric(
32 | box_lists, min_level, max_level, canonical_box_size,
33 | canonical_level, metric_fn=_box_max_size):
34 | """
35 | Map each box in `box_lists` to a feature map level index and return the assignment
36 | vector.
37 |
38 | Args:
39 | box_lists (list[detectron2.structures.Boxes]): A list of N Boxes or N RotatedBoxes,
40 | where N is the number of images in the batch.
41 | min_level (int): Smallest feature map level index. The input is considered index 0,
42 | the output of stage 1 is index 1, and so.
43 | max_level (int): Largest feature map level index.
44 | canonical_box_size (int): A canonical box size in pixels (shorter side).
45 | canonical_level (int): The feature map level index on which a canonically-sized box
46 | should be placed.
47 |
48 | Returns:
49 | A tensor of length M, where M is the total number of boxes aggregated over all
50 | N batch images. The memory layout corresponds to the concatenation of boxes
51 | from all images. Each element is the feature map index, as an offset from
52 | `self.min_level`, for the corresponding box (so value i means the box is at
53 | `self.min_level + i`).
54 | """
55 | eps = sys.float_info.epsilon
56 | box_sizes = cat([metric_fn(boxes) for boxes in box_lists])
57 | # Eqn.(1) in FPN paper
58 | level_assignments = torch.floor(
59 | canonical_level + torch.log2(box_sizes / canonical_box_size + eps)
60 | )
61 | level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
62 | return level_assignments.to(torch.int64) - min_level
63 |
64 |
65 | def assign_boxes_to_levels_max(
66 | box_lists, min_level, max_level, canonical_box_size,
67 | canonical_level):
68 | return assign_boxes_to_levels_by_metric(
69 | box_lists, min_level, max_level, canonical_box_size,
70 | canonical_level, metric_fn=_box_max_size
71 | )
72 |
73 |
74 | def assign_boxes_to_levels_bezier(
75 | box_lists, min_level, max_level, canonical_box_size,
76 | canonical_level):
77 | return assign_boxes_to_levels_by_metric(
78 | box_lists, min_level, max_level, canonical_box_size,
79 | canonical_level, metric_fn=_bezier_height
80 | )
81 |
82 |
83 | class TopPooler(ROIPooler):
84 | """
85 | ROIPooler with option to assign level by max length. Used by top modules.
86 | """
87 | def __init__(self,
88 | output_size,
89 | scales,
90 | sampling_ratio,
91 | pooler_type,
92 | canonical_box_size=224,
93 | canonical_level=4,
94 | assign_crit="area",):
95 | # to reuse the parent initialization, handle unsupported pooler types
96 | parent_pooler_type = "ROIAlign" if pooler_type == "BezierAlign" else pooler_type
97 | super().__init__(output_size, scales, sampling_ratio, parent_pooler_type,
98 | canonical_box_size=canonical_box_size,
99 | canonical_level=canonical_level)
100 | if parent_pooler_type != pooler_type:
101 | # reinit the level_poolers here
102 | self.level_poolers = nn.ModuleList(
103 | BezierAlign(
104 | output_size, spatial_scale=scale,
105 | sampling_ratio=sampling_ratio) for scale in scales
106 | )
107 | self.assign_crit = assign_crit
108 |
109 | def forward(self, x, box_lists):
110 | """
111 | see
112 | """
113 | num_level_assignments = len(self.level_poolers)
114 |
115 | assert isinstance(x, list) and isinstance(
116 | box_lists, list
117 | ), "Arguments to pooler must be lists"
118 | assert (
119 | len(x) == num_level_assignments
120 | ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
121 | num_level_assignments, len(x)
122 | )
123 |
124 | assert len(box_lists) == x[0].size(
125 | 0
126 | ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
127 | x[0].size(0), len(box_lists)
128 | )
129 |
130 | if isinstance(box_lists[0], torch.Tensor):
131 | # TODO: use Beziers for data_mapper
132 | box_lists = [Beziers(x) for x in box_lists]
133 | pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
134 |
135 | if num_level_assignments == 1:
136 | return self.level_poolers[0](x[0], pooler_fmt_boxes)
137 |
138 | if self.assign_crit == "max":
139 | assign_method = assign_boxes_to_levels_max
140 | elif self.assign_crit == "bezier":
141 | assign_method = assign_boxes_to_levels_bezier
142 | else:
143 | assign_method = assign_boxes_to_levels
144 |
145 | level_assignments = assign_method(
146 | box_lists, self.min_level, self.max_level,
147 | self.canonical_box_size, self.canonical_level)
148 |
149 | num_boxes = len(pooler_fmt_boxes)
150 | num_channels = x[0].shape[1]
151 | output_size = self.output_size
152 |
153 | dtype, device = x[0].dtype, x[0].device
154 | output = torch.zeros(
155 | (num_boxes, num_channels, output_size[0], output_size[1]), dtype=dtype, device=device
156 | )
157 |
158 | for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)):
159 | inds = torch.nonzero(level_assignments == level).squeeze(1)
160 | pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
161 | output[inds] = pooler(x_level, pooler_fmt_boxes_level)
162 |
163 | return output
164 |
--------------------------------------------------------------------------------
/det/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .libra_rcnn import LibraRCNNROIHeads
3 |
--------------------------------------------------------------------------------
/det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc
--------------------------------------------------------------------------------
/det/modeling/roi_heads/libra_rcnn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import logging
3 | import numpy as np
4 | import torch
5 | from torch import nn
6 | from torch.nn import functional as F
7 |
8 | from detectron2.layers import ShapeSpec
9 | from detectron2.structures import Boxes, Instances, pairwise_iou
10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs
11 | from detectron2.layers import batched_nms, cat
12 | from det.layers import BalancedL1Loss
13 | from detectron2.modeling.roi_heads import (StandardROIHeads, ROI_HEADS_REGISTRY)
14 | from detectron2.modeling.sampling import subsample_labels
15 |
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 |
21 | class LibraCNNOutputs(FastRCNNOutputs):
22 | def __init__(
23 | self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, baclanced_l1_beta
24 | ):
25 | self.box2box_transform = box2box_transform
26 | self.num_preds_per_image = [len(p) for p in proposals]
27 | self.pred_class_logits = pred_class_logits
28 | self.pred_proposal_deltas = pred_proposal_deltas
29 | self.baclanced_l1_beta = baclanced_l1_beta
30 |
31 | box_type = type(proposals[0].proposal_boxes)
32 | # cat(..., dim=0) concatenates over all images in the batch
33 | self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
34 | assert not self.proposals.tensor.requires_grad, "Proposals should not require gradients!"
35 | self.image_shapes = [x.image_size for x in proposals]
36 |
37 | self.ba_l1_loss = BalancedL1Loss(alpha=0.5,
38 | gamma=1.5,
39 | beta=1.0,
40 | reduction='mean',
41 | loss_weight=1.0)
42 |
43 | # The following fields should exist only when training.
44 | if proposals[0].has("gt_boxes"):
45 | self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
46 | assert proposals[0].has("gt_classes")
47 | self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
48 |
49 | def baclanced_l1_loss(self):
50 | """
51 | Compute the baclanced L1 loss for box regression.
52 |
53 | Returns:
54 | scalar Tensor
55 | """
56 | gt_proposal_deltas = self.box2box_transform.get_deltas(
57 | self.proposals.tensor, self.gt_boxes.tensor
58 | )
59 | box_dim = gt_proposal_deltas.size(1) # 4 or 5
60 | cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
61 | device = self.pred_proposal_deltas.device
62 |
63 | bg_class_ind = self.pred_class_logits.shape[1] - 1
64 |
65 | fg_inds = torch.nonzero((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)).squeeze(
66 | 1
67 | )
68 | if cls_agnostic_bbox_reg:
69 | # pred_proposal_deltas only corresponds to foreground class for agnostic
70 | gt_class_cols = torch.arange(box_dim, device=device)
71 | else:
72 | fg_gt_classes = self.gt_classes[fg_inds]
73 | # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
74 | # where b is the dimension of box representation (4 or 5)
75 | # Note that compared to Detectron1,
76 | # we do not perform bounding box regression for background classes.
77 | gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
78 |
79 |
80 | loss_box_reg = self.ba_l1_loss(
81 | self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
82 | gt_proposal_deltas[fg_inds],
83 | self.baclanced_l1_beta,
84 | reduction="mean",
85 | )
86 |
87 | loss_box_reg = loss_box_reg / self.gt_classes.numel()
88 | return loss_box_reg
89 |
90 | def losses(self):
91 | """
92 | Compute the default losses for box head in Fast(er) R-CNN,
93 | with softmax cross entropy loss and smooth L1 loss.
94 |
95 | Returns:
96 | A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg".
97 | """
98 | return {
99 | "loss_cls": self.softmax_cross_entropy_loss(),
100 | "loss_box_reg_ba": self.baclanced_l1_loss(),
101 | }
102 |
103 | @ROI_HEADS_REGISTRY.register()
104 | class LibraRCNNROIHeads(StandardROIHeads):
105 |
106 | def _sample_proposals(self, matched_idxs, matched_labels, gt_classes):
107 | """
108 | modified from roi_heads
109 | """
110 | has_gt = gt_classes.numel() > 0
111 | # Get the corresponding GT for each proposal
112 | if has_gt:
113 | gt_classes = gt_classes[matched_idxs]
114 | # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
115 | gt_classes[matched_labels == 0] = self.num_classes
116 | # Label ignore proposals (-1 label)
117 | gt_classes[matched_labels == -1] = -1
118 | else:
119 | gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
120 |
121 | sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
122 | gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes
123 | )
124 |
125 | sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
126 | return sampled_idxs, gt_classes[sampled_idxs]
127 |
128 |
129 | def __forward_box(self, features, proposals):
130 | box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
131 | box_features = self.box_head(box_features)
132 | pred_class_logits, pred_proposal_deltas = self.box_predictor(box_features)
133 | del box_features
134 |
135 | outputs = LibraRCNNOutputs(
136 | self.box2box_transform,
137 | pred_class_logits,
138 | pred_proposal_deltas,
139 | proposals,
140 | self.smooth_l1_beta,
141 | )
142 | if self.training:
143 | if self.train_on_pred_boxes:
144 | pred_boxes = outputs.predict_boxes_for_gt_classes()
145 | for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
146 | proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
147 | return outputs.losses()
148 | else:
149 | pred_instances, _ = outputs.inference(
150 | self.test_score_thresh, self.test_nms_thresh, self.test_detections_per_img
151 | )
152 | return pred_instances
153 |
154 |
155 |
156 |
157 |
158 |
--------------------------------------------------------------------------------
/det/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__init__.py
--------------------------------------------------------------------------------
/det/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/det/utils/__pycache__/comm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/comm.cpython-36.pyc
--------------------------------------------------------------------------------
/det/utils/comm.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 | from detectron2.utils.comm import get_world_size
3 |
4 |
5 | def reduce_sum(tensor):
6 | world_size = get_world_size()
7 | if world_size < 2:
8 | return tensor
9 | tensor = tensor.clone()
10 | dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
11 | return tensor
12 |
--------------------------------------------------------------------------------
/det/utils/measures.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Adapted from https://github.com/ShichenLiu/CondenseNet/blob/master/utils.py
3 | from __future__ import absolute_import
4 | from __future__ import unicode_literals
5 | from __future__ import print_function
6 | from __future__ import division
7 |
8 | import operator
9 |
10 | from functools import reduce
11 |
12 |
13 | def get_num_gen(gen):
14 | return sum(1 for x in gen)
15 |
16 |
17 | def is_pruned(layer):
18 | try:
19 | layer.mask
20 | return True
21 | except AttributeError:
22 | return False
23 |
24 |
25 | def is_leaf(model):
26 | return get_num_gen(model.children()) == 0
27 |
28 |
29 | def get_layer_info(layer):
30 | layer_str = str(layer)
31 | type_name = layer_str[:layer_str.find('(')].strip()
32 | return type_name
33 |
34 |
35 | def get_layer_param(model):
36 | return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()])
37 |
38 |
39 | ### The input batch size should be 1 to call this function
40 | def measure_layer(layer, *args):
41 | global count_ops, count_params
42 |
43 | for x in args:
44 | delta_ops = 0
45 | delta_params = 0
46 | multi_add = 1
47 | type_name = get_layer_info(layer)
48 |
49 | ### ops_conv
50 | if type_name in ['Conv2d']:
51 | out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] - layer.kernel_size[0]) /
52 | layer.stride[0] + 1)
53 | out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] - layer.kernel_size[1]) /
54 | layer.stride[1] + 1)
55 | delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
56 | delta_params = get_layer_param(layer)
57 |
58 | elif type_name in ['ConvTranspose2d']:
59 | _, _, in_h, in_w = x.size()
60 | out_h = int((in_h-1)*layer.stride[0] - 2 * layer.padding[0] + layer.kernel_size[0] + layer.output_padding[0])
61 | out_w = int((in_w-1)*layer.stride[1] - 2 * layer.padding[1] + layer.kernel_size[1] + layer.output_padding[1])
62 | delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \
63 | layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
64 | delta_params = get_layer_param(layer)
65 |
66 | ### ops_learned_conv
67 | elif type_name in ['LearnedGroupConv']:
68 | measure_layer(layer.relu, x)
69 | measure_layer(layer.norm, x)
70 | conv = layer.conv
71 | out_h = int((x.size()[2] + 2 * conv.padding[0] - conv.kernel_size[0]) /
72 | conv.stride[0] + 1)
73 | out_w = int((x.size()[3] + 2 * conv.padding[1] - conv.kernel_size[1]) /
74 | conv.stride[1] + 1)
75 | delta_ops = conv.in_channels * conv.out_channels * conv.kernel_size[0] * conv.kernel_size[1] * out_h * out_w / layer.condense_factor * multi_add
76 | delta_params = get_layer_param(conv) / layer.condense_factor
77 |
78 | ### ops_nonlinearity
79 | elif type_name in ['ReLU', 'ReLU6']:
80 | delta_ops = x.numel()
81 | delta_params = get_layer_param(layer)
82 |
83 | ### ops_pooling
84 | elif type_name in ['AvgPool2d', 'MaxPool2d']:
85 | in_w = x.size()[2]
86 | kernel_ops = layer.kernel_size * layer.kernel_size
87 | out_w = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1)
88 | out_h = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1)
89 | delta_ops = x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops
90 | delta_params = get_layer_param(layer)
91 |
92 | elif type_name in ['LastLevelMaxPool']:
93 | pass
94 |
95 | elif type_name in ['AdaptiveAvgPool2d']:
96 | delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3]
97 | delta_params = get_layer_param(layer)
98 |
99 | elif type_name in ['ZeroPad2d', 'RetinaNetPostProcessor']:
100 | pass
101 | #delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3]
102 | #delta_params = get_layer_param(layer)
103 |
104 | ### ops_linear
105 | elif type_name in ['Linear']:
106 | weight_ops = layer.weight.numel() * multi_add
107 | bias_ops = layer.bias.numel()
108 | delta_ops = x.size()[0] * (weight_ops + bias_ops)
109 | delta_params = get_layer_param(layer)
110 |
111 | ### ops_nothing
112 | elif type_name in ['BatchNorm2d', 'Dropout2d', 'DropChannel', 'Dropout', 'FrozenBatchNorm2d', 'GroupNorm']:
113 | delta_params = get_layer_param(layer)
114 |
115 | elif type_name in ['SumTwo']:
116 | delta_ops = x.numel()
117 |
118 | elif type_name in ['AggregateCell']:
119 | if not layer.pre_transform:
120 | delta_ops = 2 * x.numel() # twice for each input
121 | else:
122 | measure_layer(layer.branch_1, x)
123 | measure_layer(layer.branch_2, x)
124 | delta_params = get_layer_param(layer)
125 |
126 | elif type_name in ['Identity', 'Zero']:
127 | pass
128 |
129 | elif type_name in ['Scale']:
130 | delta_params = get_layer_param(layer)
131 | delta_ops = x.numel()
132 |
133 | elif type_name in ['FCOSPostProcessor', 'RPNPostProcessor', 'KeypointPostProcessor',
134 | 'ROIAlign', 'PostProcessor', 'KeypointRCNNPredictor',
135 | 'NaiveSyncBatchNorm', 'Upsample', 'Sequential']:
136 | pass
137 |
138 | elif type_name in ['DeformConv']:
139 | # don't count bilinear
140 | offset_conv = list(layer.parameters())[0]
141 | delta_ops = reduce(operator.mul, offset_conv.size(), x.size()[2] * x.size()[3])
142 | out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0]
143 | - layer.kernel_size[0]) / layer.stride[0] + 1)
144 | out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1]
145 | - layer.kernel_size[1]) / layer.stride[1] + 1)
146 | delta_ops += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add
147 | delta_params = get_layer_param(layer)
148 |
149 | ### unknown layer type
150 | else:
151 | raise TypeError('unknown layer type: %s' % type_name)
152 |
153 | count_ops += delta_ops
154 | count_params += delta_params
155 | return
156 |
157 |
158 | def measure_model(model, x):
159 | global count_ops, count_params
160 | count_ops = 0
161 | count_params = 0
162 |
163 | def should_measure(x):
164 | return is_leaf(x) or is_pruned(x)
165 |
166 | def modify_forward(model):
167 | for child in model.children():
168 | if should_measure(child):
169 | def new_forward(m):
170 | def lambda_forward(*args):
171 | measure_layer(m, *args)
172 | return m.old_forward(*args)
173 | return lambda_forward
174 | child.old_forward = child.forward
175 | child.forward = new_forward(child)
176 | else:
177 | modify_forward(child)
178 |
179 | def restore_forward(model):
180 | for child in model.children():
181 | # leaf node
182 | if is_leaf(child) and hasattr(child, 'old_forward'):
183 | child.forward = child.old_forward
184 | child.old_forward = None
185 | else:
186 | restore_forward(child)
187 |
188 | modify_forward(model)
189 | out = model.forward(x)
190 | restore_forward(model)
191 |
192 | return out, count_ops, count_params
193 |
--------------------------------------------------------------------------------
/det/utils/visualizer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from detectron2.utils.visualizer import Visualizer
4 |
5 |
6 | class TextVisualizer(Visualizer):
7 | def draw_instance_predictions(self, predictions):
8 | beziers = predictions.beziers.numpy()
9 | scores = predictions.scores.tolist()
10 | recs = predictions.recs
11 |
12 | self.overlay_instances(beziers, recs, scores)
13 |
14 | return self.output
15 |
16 | def _bezier_to_poly(self, bezier):
17 | # bezier to polygon
18 | u = np.linspace(0, 1, 20)
19 | bezier = bezier.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4)
20 | points = np.outer((1 - u) ** 3, bezier[:, 0]) \
21 | + np.outer(3 * u * ((1 - u) ** 2), bezier[:, 1]) \
22 | + np.outer(3 * (u ** 2) * (1 - u), bezier[:, 2]) \
23 | + np.outer(u ** 3, bezier[:, 3])
24 | points = np.concatenate((points[:, :2], points[:, 2:]), axis=0)
25 |
26 | return points
27 |
28 | def _decode_recognition(self, rec):
29 | CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
30 |
31 | s = ''
32 | for c in rec:
33 | c = int(c)
34 | if c < 95:
35 | s += CTLABELS[c]
36 | elif c == 95:
37 | s += u'口'
38 | return s
39 |
40 | def _ctc_decode_recognition(self, rec):
41 | CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
42 |
43 | # ctc decoding
44 | last_char = False
45 | s = ''
46 | for c in rec:
47 | c = int(c)
48 | if c < 95:
49 | if last_char != c:
50 | s += CTLABELS[c]
51 | last_char = c
52 | elif c == 95:
53 | s += u'口'
54 | else:
55 | last_char = False
56 | return s
57 |
58 | def overlay_instances(self, beziers, recs, scores, alpha=0.5):
59 | color = (0.1, 0.2, 0.5)
60 |
61 | for bezier, rec, score in zip(beziers, recs, scores):
62 | polygon = self._bezier_to_poly(bezier)
63 | self.draw_polygon(polygon, color, alpha=alpha)
64 |
65 | # draw text in the top left corner
66 | text = self._decode_recognition(rec)
67 | text = "{:.3f}: {}".format(score, text)
68 | lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
69 | text_pos = polygon[0]
70 | horiz_align = "left"
71 | font_size = self._default_font_size
72 |
73 | self.draw_text(
74 | text,
75 | text_pos,
76 | color=lighter_color,
77 | horizontal_alignment=horiz_align,
78 | font_size=font_size,
79 | )
--------------------------------------------------------------------------------
/docs/nms/README.md:
--------------------------------------------------------------------------------
1 | #NMS解析
2 |
3 | https://zhuanlan.zhihu.com/p/80902998
--------------------------------------------------------------------------------
/docs/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 |
3 | __all__ = ['nms', 'soft_nms']
4 |
--------------------------------------------------------------------------------
/docs/nms/demo_nms.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | static void sort( int n, const float* x, int* indices){
7 | int i, j;
8 | for(i=0; i x[indices[i]]){
11 | std::swap(indices[i], indices[j]);
12 | }
13 |
14 | }
15 | }
16 |
17 | }
18 |
19 | int nonMaximumSuppression(
20 | int numBoxes, const CvPoint *points,
21 | const CvPoint *oppositePoints, const float *score,
22 | float overlapThreshold, int *numBoxOut, CvPoint **pointsOut,
23 | CvPoint **oppositePointsOut, float *scoreOut){
24 |
25 |
26 | int i, j, index;
27 | float *box_area = (float *)malloc(numBoxes*sizeof(float));
28 | int *indices = (int *)malloc(numBoxes*sizeof(int));
29 | int *is_suppressed = (int *)malloc(numBoxes*sizeof(int));
30 |
31 | for(i=0; i 0 && overlapHeight >0){
58 | float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ;
59 | if(overlapPart > overlapThreshold){
60 | is_suppressed[indices[i]] =1;
61 | }
62 | }
63 | }
64 | }
65 | }
66 | }
67 |
68 | *numBoxOut = 0;
69 | for(int i = 0; i < numBoxes; i++){
70 | if(!is_suppressed[i]) (*numBoxOut)++;
71 | }
72 |
73 | *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint));
74 | *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint));
75 | *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float));
76 |
77 | index =0;
78 |
79 | for(int i = 0; i < numBoxes; i++){
80 | if(!is_suppressed[indices[i]]){
81 | (*pointsOut)[index].x = points[indices[i]].x;
82 | (*pointsOut)[index].y = points[indices[i]].y;
83 | (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;
84 | (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;
85 | (*scoreOut)[index] = score[indices[i]];
86 | index++;
87 | }
88 | }
89 |
90 | free(indices);
91 | free(box_area);
92 | free(is_suppressed);
93 | return 1;
94 | }
--------------------------------------------------------------------------------
/docs/nms/demo_nms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | def compute_iou(box1, box2, wh= False):
4 | if wh == False:
5 | xmin1, ymin1, xmax1, ymax1 = box1
6 | xmin2, ymin2, xmax2, ymax2 = box2
7 | else:
8 | xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0)
9 | xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0)
10 | xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0)
11 | xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0)
12 |
13 | xx1 = np.max([xmin1, xmin2])0
14 | yy1 = np.max([ymin1, ymin2])
15 | xx2 = np.max([xmax1, xmax2])
16 | yy2 = np.max([ymax1, ymax2])
17 |
18 | area1 = (xmax1 - xmin1)*(ymax1 - ymin1)
19 | area2 = (xmax2 - xmin2)*(ymax2 - ymin2)
20 |
21 | inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1])
22 | iou = inter_area / (area1 + area2 - inter_area +1e-6)
23 | return iou
24 |
25 | def py_cpu_nms(dets, thresh):
26 | #dets某个类的框,x1、y1、x2、y2、以及置信度score
27 | #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]]
28 | x1 = dets[:, 0]
29 | y1 = dets[:, 1]
30 | x2 = dets[:, 2]
31 | y2 = dets[:, 3]
32 | scores = dets[:, 4]
33 | #面積
34 | areas = (x2- x1 +1)*(y2- y1 +1)
35 | order = scores.argsort()[::-1]#按照置信度降序排序
36 | keep = []
37 |
38 | while order.size() >0:
39 | i = order[0]#保留得分最高的
40 | keep.append(i)
41 | #得到相交区域,左上及右下
42 | xx1 = np.maximum(x1[i], x1[order[1:]])
43 | yy1 = np.maximum(y1[i], y1[order[1:]])
44 | xx2 = np.minimum(x2[i], x2[order[1:]])
45 | yy2 = np.minimum(y2[i], y2[order[1:]])
46 | #得到相交区域,左上及右下
47 | w = np.maximum(0.0, xx2-xx1 +1)
48 | h = np.maximum(0.0, yy2-yy1 +1)
49 |
50 | inter = w*h
51 | #计算IoU:重叠面积 /(面积1+面积2-重叠面积)
52 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
53 | #保留IoU小于阈值的box
54 | inds = np.where(ovr <= thresh)[0]
55 | order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位
56 |
57 | return keep
58 |
59 | def iou(self, box1, box2):
60 | N = box1.size(0)
61 | M = box2.size(0)
62 |
63 | lt = torch.max( # 左上角的点
64 | box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2]->[N,1,2]->[N,M,2]
65 | box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2]->[1,M,2]->[N,M,2]
66 | )
67 |
68 | rb = torch.min(
69 | box1[:, 2:].unsqueeze(1).expand(N, M, 2),
70 | box2[:, 2:].unsqueeze(0).expand(N, M, 2),
71 | )
72 | wh = rb -lt
73 | wh[wh < 0] = 0
74 |
75 | inter = wh[:,:,0]*wh[:,:,1]
76 | area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1])
77 | area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1])
78 | area1 = area1.unsqueeze(1).expand(N, M)
79 | area2 = area2.unsqueeze(1).expand(N, M)
80 |
81 | iou = inter / (area1 + area2 - inter)
82 |
83 | return iou
84 |
85 | def nms(self, bboxes, scores, threshold=0.5):
86 | x1 = bboxes[:, 0]
87 | y1 = bboxes[:, 1]
88 | x2 = bboxes[:, 2]
89 | y2 = bboxes[:, 3]
90 | areas = (x2 - x1) * (y2 - y1)
91 | _, order = scores.sort(0, descending=True)
92 | keep =[]
93 | while order.numel()>0:
94 | if order.numel()==1:
95 | i = order.item()
96 | keep.append(i)
97 | else:
98 | i =order[0].item()
99 | keep.append(i)
100 | xx1 = x1[order[1:]].clamp(min= x1[i])
101 | yy1 = y1[order[1:]].clamp(min= y1[i])
102 | xx2 = x2[order[1:]].clamp(min= x2[i])
103 | yy2 = y2[order[1:]].clamp(min= y2[i])
104 |
105 | inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)
106 |
107 | iou = inter / (areas[i] + areas[i] -inter)
108 | idx = (iou < threshold).nonzero().squeeze()
109 |
110 | order = order[idx +1]
111 | return torch.LongTensor(keep)
112 |
113 |
114 |
--------------------------------------------------------------------------------
/docs/nms/nms.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | static void sort( int n, const float* x, int* indices){
7 | int i, j;
8 | for(i=0; i x[indices[i]]){
11 | std::swap(indices[i], indices[j]);
12 | }
13 |
14 | }
15 | }
16 |
17 | }
18 |
19 | int nonMaximumSuppression(
20 | int numBoxes, const CvPoint *points,
21 | const CvPoint *oppositePoints, const float *score,
22 | float overlapThreshold, int *numBoxOut, CvPoint **pointsOut,
23 | CvPoint **oppositePointsOut, float *scoreOut){
24 |
25 |
26 | int i, j, index;
27 | float *box_area = (float *)malloc(numBoxes*sizeof(float));
28 | int *indices = (int *)malloc(numBoxes*sizeof(int));
29 | int *is_suppressed = (int *)malloc(numBoxes*sizeof(int));
30 |
31 | for(i=0; i 0 && overlapHeight >0){
58 | float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ;
59 | if(overlapPart > overlapThreshold){
60 | is_suppressed[indices[i]] =1;
61 | }
62 | }
63 | }
64 | }
65 | }
66 | }
67 |
68 | *numBoxOut = 0;
69 | for(int i = 0; i < numBoxes; i++){
70 | if(!is_suppressed[i]) (*numBoxOut)++;
71 | }
72 |
73 | *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint));
74 | *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint));
75 | *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float));
76 |
77 | index =0;
78 |
79 | for(int i = 0; i < numBoxes; i++){
80 | if(!is_suppressed[indices[i]]){
81 | (*pointsOut)[index].x = points[indices[i]].x;
82 | (*pointsOut)[index].y = points[indices[i]].y;
83 | (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;
84 | (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;
85 | (*scoreOut)[index] = score[indices[i]];
86 | index++;
87 | }
88 | }
89 |
90 | free(indices);
91 | free(box_area);
92 | free(is_suppressed);
93 | return 1;
94 | }
--------------------------------------------------------------------------------
/docs/nms/nms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | def compute_iou(box1, box2, wh= False):
4 | if wh == False:
5 | xmin1, ymin1, xmax1, ymax1 = box1
6 | xmin2, ymin2, xmax2, ymax2 = box2
7 | else:
8 | xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0)
9 | xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0)
10 | xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0)
11 | xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0)
12 |
13 | xx1 = np.max([xmin1, xmin2])0
14 | yy1 = np.max([ymin1, ymin2])
15 | xx2 = np.max([xmax1, xmax2])
16 | yy2 = np.max([ymax1, ymax2])
17 |
18 | area1 = (xmax1 - xmin1)*(ymax1 - ymin1)
19 | area2 = (xmax2 - xmin2)*(ymax2 - ymin2)
20 |
21 | inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1])
22 | iou = inter_area / (area1 + area2 - inter_area +1e-6)
23 | return iou
24 |
25 | def py_cpu_nms(dets, thresh):
26 | #dets某个类的框,x1、y1、x2、y2、以及置信度score
27 | #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]]
28 | x1 = dets[:, 0]
29 | y1 = dets[:, 1]
30 | x2 = dets[:, 2]
31 | y2 = dets[:, 3]
32 | scores = dets[:, 4]
33 | #面積
34 | areas = (x2- x1 +1)*(y2- y1 +1)
35 | order = scores.argsort()[::-1]#按照置信度降序排序
36 | keep = []
37 |
38 | while order.size() >0:
39 | i = order[0]#保留得分最高的
40 | keep.append(i)
41 | #得到相交区域,左上及右下
42 | xx1 = np.maximum(x1[i], x1[order[1:]])
43 | yy1 = np.maximum(y1[i], y1[order[1:]])
44 | xx2 = np.minimum(x2[i], x2[order[1:]])
45 | yy2 = np.minimum(y2[i], y2[order[1:]])
46 | #得到相交区域,左上及右下
47 | w = np.maximum(0.0, xx2-xx1 +1)
48 | h = np.maximum(0.0, yy2-yy1 +1)
49 |
50 | inter = w*h
51 | #计算IoU:重叠面积 /(面积1+面积2-重叠面积)
52 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
53 | #保留IoU小于阈值的box
54 | inds = np.where(ovr <= thresh)[0]
55 | order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位
56 |
57 | return keep
58 |
59 | def iou(self, box1, box2):
60 | N = box1.size(0)
61 | M = box2.size(0)
62 |
63 | lt = torch.max( # 左上角的点
64 | box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2]->[N,1,2]->[N,M,2]
65 | box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2]->[1,M,2]->[N,M,2]
66 | )
67 |
68 | rb = torch.min(
69 | box1[:, 2:].unsqueeze(1).expand(N, M, 2),
70 | box2[:, 2:].unsqueeze(0).expand(N, M, 2),
71 | )
72 | wh = rb -lt
73 | wh[wh < 0] = 0
74 |
75 | inter = wh[:,:,0]*wh[:,:,1]
76 | area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1])
77 | area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1])
78 | area1 = area1.unsqueeze(1).expand(N, M)
79 | area2 = area2.unsqueeze(1).expand(N, M)
80 |
81 | iou = inter / (area1 + area2 - inter)
82 |
83 | return iou
84 |
85 | def nms(self, bboxes, scores, threshold=0.5):
86 | x1 = bboxes[:, 0]
87 | y1 = bboxes[:, 1]
88 | x2 = bboxes[:, 2]
89 | y2 = bboxes[:, 3]
90 | areas = (x2 - x1) * (y2 - y1)
91 | _, order = scores.sort(0, descending=True)
92 | keep =[]
93 | while order.numel()>0:
94 | if order.numel()==1:
95 | i = order.item()
96 | keep.append(i)
97 | else:
98 | i =order[0].item()
99 | keep.append(i)
100 | xx1 = x1[order[1:]].clamp(min= x1[i])
101 | yy1 = y1[order[1:]].clamp(min= y1[i])
102 | xx2 = x2[order[1:]].clamp(min= x2[i])
103 | yy2 = y2[order[1:]].clamp(min= y2[i])
104 |
105 | inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)
106 |
107 | iou = inter / (areas[i] + areas[i] -inter)
108 | idx = (iou < threshold).nonzero().squeeze()
109 |
110 | order = order[idx +1]
111 | return torch.LongTensor(keep)
112 |
113 |
114 |
--------------------------------------------------------------------------------
/docs/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from . import nms_cpu, nms_cuda
5 | from .soft_nms_cpu import soft_nms_cpu
6 |
7 |
8 | def nms(dets, iou_thr, device_id=None):
9 | """Dispatch to either CPU or GPU NMS implementations.
10 |
11 | The input can be either a torch tensor or numpy array. GPU NMS will be used
12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 | will be used. The returned type will always be the same as inputs.
14 |
15 | Arguments:
16 | dets (torch.Tensor or np.ndarray): bboxes with scores.
17 | iou_thr (float): IoU threshold for NMS.
18 | device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 | is None, then cpu nms is used, otherwise gpu_nms will be used.
20 |
21 | Returns:
22 | tuple: kept bboxes and indice, which is always the same data type as
23 | the input.
24 |
25 | Example:
26 | >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
27 | >>> [49.3, 32.9, 51.0, 35.3, 0.9],
28 | >>> [49.2, 31.8, 51.0, 35.4, 0.5],
29 | >>> [35.1, 11.5, 39.1, 15.7, 0.5],
30 | >>> [35.6, 11.8, 39.3, 14.2, 0.5],
31 | >>> [35.3, 11.5, 39.9, 14.5, 0.4],
32 | >>> [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
33 | >>> iou_thr = 0.7
34 | >>> supressed, inds = nms(dets, iou_thr)
35 | >>> assert len(inds) == len(supressed) == 3
36 | """
37 | # convert dets (tensor or numpy array) to tensor
38 | if isinstance(dets, torch.Tensor):
39 | is_numpy = False
40 | dets_th = dets
41 | elif isinstance(dets, np.ndarray):
42 | is_numpy = True
43 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
44 | dets_th = torch.from_numpy(dets).to(device)
45 | else:
46 | raise TypeError(
47 | 'dets must be either a Tensor or numpy array, but got {}'.format(
48 | type(dets)))
49 |
50 | # execute cpu or cuda nms
51 | if dets_th.shape[0] == 0:
52 | inds = dets_th.new_zeros(0, dtype=torch.long)
53 | else:
54 | if dets_th.is_cuda:
55 | inds = nms_cuda.nms(dets_th, iou_thr)
56 | else:
57 | inds = nms_cpu.nms(dets_th, iou_thr)
58 |
59 | if is_numpy:
60 | inds = inds.cpu().numpy()
61 | return dets[inds, :], inds
62 |
63 |
64 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
65 | """
66 | Example:
67 | >>> dets = np.array([[4., 3., 5., 3., 0.9],
68 | >>> [4., 3., 5., 4., 0.9],
69 | >>> [3., 1., 3., 1., 0.5],
70 | >>> [3., 1., 3., 1., 0.5],
71 | >>> [3., 1., 3., 1., 0.4],
72 | >>> [3., 1., 3., 1., 0.0]], dtype=np.float32)
73 | >>> iou_thr = 0.7
74 | >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5)
75 | >>> assert len(inds) == len(supressed) == 3
76 | """
77 | if isinstance(dets, torch.Tensor):
78 | is_tensor = True
79 | dets_np = dets.detach().cpu().numpy()
80 | elif isinstance(dets, np.ndarray):
81 | is_tensor = False
82 | dets_np = dets
83 | else:
84 | raise TypeError(
85 | 'dets must be either a Tensor or numpy array, but got {}'.format(
86 | type(dets)))
87 |
88 | method_codes = {'linear': 1, 'gaussian': 2}
89 | if method not in method_codes:
90 | raise ValueError('Invalid method for SoftNMS: {}'.format(method))
91 | new_dets, inds = soft_nms_cpu(
92 | dets_np,
93 | iou_thr,
94 | method=method_codes[method],
95 | sigma=sigma,
96 | min_score=min_score)
97 |
98 | if is_tensor:
99 | return dets.new_tensor(new_dets), dets.new_tensor(
100 | inds, dtype=torch.long)
101 | else:
102 | return new_dets.astype(np.float32), inds.astype(np.int64)
103 |
--------------------------------------------------------------------------------
/docs/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 |
4 | template
5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
6 | //判断是否是cpu tensor
7 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
8 |
9 | if (dets.numel() == 0) {
10 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
11 | }
12 | //tensor.select(1, index)等效于tensor[:, index]
13 | auto x1_t = dets.select(1, 0).contiguous();
14 | auto y1_t = dets.select(1, 1).contiguous();
15 | auto x2_t = dets.select(1, 2).contiguous();
16 | auto y2_t = dets.select(1, 3).contiguous();
17 | auto scores = dets.select(1, 4).contiguous();
18 | //计算面积
19 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
20 | //降序排序
21 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
22 |
23 | auto ndets = dets.size(0);
24 | at::Tensor suppressed_t =
25 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
26 |
27 | //这个是记录每个bbox的状态的。即有没有被前面的box给过滤掉。
28 | auto suppressed = suppressed_t.data();
29 | auto order = order_t.data();
30 | auto x1 = x1_t.data();
31 | auto y1 = y1_t.data();
32 | auto x2 = x2_t.data();
33 | auto y2 = y2_t.data();
34 | auto areas = areas_t.data();
35 |
36 | for (int64_t _i = 0; _i < ndets; _i++) {
37 | auto i = order[_i];
38 | if (suppressed[i] == 1) continue;
39 | auto ix1 = x1[i];
40 | auto iy1 = y1[i];
41 | auto ix2 = x2[i];
42 | auto iy2 = y2[i];
43 | auto iarea = areas[i];
44 |
45 | for (int64_t _j = _i + 1; _j < ndets; _j++) {
46 | auto j = order[_j];
47 | if (suppressed[j] == 1) continue;
48 | auto xx1 = std::max(ix1, x1[j]);
49 | auto yy1 = std::max(iy1, y1[j]);
50 | auto xx2 = std::min(ix2, x2[j]);
51 | auto yy2 = std::min(iy2, y2[j]);
52 |
53 | auto w = std::max(static_cast(0), xx2 - xx1 + 1);
54 | auto h = std::max(static_cast(0), yy2 - yy1 + 1);
55 | auto inter = w * h;
56 | auto ovr = inter / (iarea + areas[j] - inter);//计算交并比
57 | if (ovr >= threshold) suppressed[j] = 1;
58 | }
59 | }
60 | return at::nonzero(suppressed_t == 0).squeeze(1);
61 | }
62 |
63 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
64 | at::Tensor result;
65 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
66 | result = nms_cpu_kernel(dets, threshold);
67 | });
68 | return result;
69 | }
70 | //转换成python
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 | m.def("nms", &nms, "non-maximum suppression");
73 | }
--------------------------------------------------------------------------------
/docs/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 |
4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
5 |
6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
7 |
8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
9 | CHECK_CUDA(dets);
10 | if (dets.numel() == 0)
11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 | return nms_cuda(dets, threshold);
13 | }
14 |
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 | m.def("nms", &nms, "non-maximum suppression");
17 | }
--------------------------------------------------------------------------------
/docs/nms/src/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 | #include
8 |
9 | #include
10 | #include
11 |
12 |
13 | //!see https://zhuanlan.zhihu.com/p/80902998
14 |
15 |
16 | int const threadsPerBlock = sizeof(unsigned long long) * 8;//分块数量
17 |
18 | //在gpu上计算IOU
19 | __device__ inline float devIoU(float const * const a, float const * const b) {
20 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
21 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
22 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
23 | float interS = width * height;
24 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
25 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
26 | return interS / (Sa + Sb - interS);
27 | }
28 |
29 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
30 | const float *dev_boxes, unsigned long long *dev_mask) {
31 | const int row_start = blockIdx.y;
32 | const int col_start = blockIdx.x;//确定当前block的横纵坐标
33 |
34 | // if (row_start > col_start) return;
35 |
36 | const int row_size =
37 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
38 | const int col_size =
39 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
40 | ////求当前block的行长度,如果最后一个block不够除,则取余下的,
41 | //比如ceil(105/25) = 5,105 = 4 * 25 + 5最后一块高为5,此时row_size=5,其余的row_size = 25
42 |
43 | // 共享内存,加速数据读取,
44 | //同一个block有共享内存,所以先使用共享内存存下当前block全部需要读取的数据
45 | //(即box的坐标和置信度)然后就不在dev_boxes里面读数据了,而是读share memory里面的数据
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 |
48 | if (threadIdx.x < col_size) {
49 | block_boxes[threadIdx.x * 5 + 0] =
50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
51 | block_boxes[threadIdx.x * 5 + 1] =
52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
53 | block_boxes[threadIdx.x * 5 + 2] =
54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
55 | block_boxes[threadIdx.x * 5 + 3] =
56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
57 | block_boxes[threadIdx.x * 5 + 4] =
58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
59 | }
60 |
61 | //为了保证线程安全,必须等所有的线程都把数据存到share memory以后,统一开始线程
62 | __syncthreads();
63 | // 这个if判断去掉多余的thread,保证余下的块可以被正确执行
64 | // 每个block里面有row_size个线程
65 | // 每个线程i,for一个col_size的循环,计算该block里面第i个box和该block中每个列box的IOU
66 | if (threadIdx.x < row_size) {
67 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
68 | const float *cur_box = dev_boxes + cur_box_idx * 5;
69 | int i = 0;
70 | unsigned long long t = 0;
71 | int start = 0;
72 | if (row_start == col_start) {
73 | start = threadIdx.x + 1;//对角线上的block, //自己跟自己就不要计算IOU了
74 | }
75 | for (i = start; i < col_size; i++) {
76 | //主循环,求该box和所有列box的IOU,如果满足条件,则使用一个mask把该位置1
77 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
78 | t |= 1ULL << i;//掩码
79 | }
80 | }
81 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
82 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
83 | }
84 | }
85 |
86 | // boxes is a N x 5 tensor
87 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
88 |
89 | // Ensure CUDA uses the input tensor device.
90 | at::DeviceGuard guard(boxes.device());
91 |
92 | using scalar_t = float;
93 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
94 | auto scores = boxes.select(1, 4);//tensor.select(1, index)等效于tensor[:, index]
95 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
96 | auto boxes_sorted = boxes.index_select(0, order_t);
97 |
98 | int boxes_num = boxes.size(0);
99 |
100 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
101 |
102 | scalar_t* boxes_dev = boxes_sorted.data();
103 |
104 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
105 |
106 | unsigned long long* mask_dev = NULL;
107 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
108 | // boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
111 |
112 |
113 | // 定义blocks的数量和每个block的线程数
114 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
115 | THCCeilDiv(boxes_num, threadsPerBlock));
116 | dim3 threads(threadsPerBlock);
117 | // 调用kernel,最后在mask_dev中求出每两个框的IoU是否超过阈值t
118 | nms_kernel<<>>(boxes_num,
119 | nms_overlap_thresh,
120 | boxes_dev,
121 | mask_dev);
122 |
123 | std::vector mask_host(boxes_num * col_blocks);
124 | THCudaCheck(cudaMemcpyAsync(
125 | &mask_host[0],
126 | mask_dev,
127 | sizeof(unsigned long long) * boxes_num * col_blocks,
128 | cudaMemcpyDeviceToHost,
129 | at::cuda::getCurrentCUDAStream()
130 | ));
131 |
132 | std::vector remv(col_blocks);// 初始是所有框都在S里面,移出标记都置为0
133 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);// 初始是所有框都在S里面,移出标记都置为0
134 |
135 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
136 | int64_t* keep_out = keep.data();
137 |
138 | int num_to_keep = 0;
139 | for (int i = 0; i < boxes_num; i++) {
140 | int nblock = i / threadsPerBlock; //求这个box是在哪个block里面计算的
141 | int inblock = i % threadsPerBlock; //求这个box在block的哪个线程计算的
142 |
143 | // 对于每个box,如果他在S中,则加入结果集,并移出S
144 | // 并把和他的IOU大于阈值的所有box全部移出S
145 | if (!(remv[nblock] & (1ULL << inblock))) {
146 | keep_out[num_to_keep++] = i;//加入结果集操作
147 | unsigned long long *p = &mask_host[0] + i * col_blocks;
148 | for (int j = nblock; j < col_blocks; j++) {
149 | remv[j] |= p[j];//移出S操作
150 | }
151 | }
152 | }
153 |
154 | THCudaFree(state, mask_dev);
155 | // TODO improve this part
156 | return std::get<0>(order_t.index({
157 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
158 | order_t.device(), keep.scalar_type())
159 | }).sort(0, false));
160 | }
161 |
--------------------------------------------------------------------------------
/docs/nms/src/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------
2 | # Soft-NMS: Improving Object Detection With One Line of Code
3 | # Copyright (c) University of Maryland, College Park
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Navaneeth Bodla and Bharat Singh
6 | # Modified by Kai Chen
7 | # ----------------------------------------------------------
8 |
9 | # cython: language_level=3, boundscheck=False
10 |
11 | import numpy as np
12 | cimport numpy as np
13 |
14 |
15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
16 | return a if a >= b else b
17 |
18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
19 | return a if a <= b else b
20 |
21 |
22 | def soft_nms_cpu(
23 | np.ndarray[float, ndim=2] boxes_in,
24 | float iou_thr,
25 | unsigned int method=1,
26 | float sigma=0.5,
27 | float min_score=0.001,
28 | ):
29 | boxes = boxes_in.copy()
30 | cdef int N = boxes.shape[0]
31 | cdef float iw, ih, box_area
32 | cdef float ua
33 | cdef int pos = 0
34 | cdef float maxscore = 0
35 | cdef int maxpos = 0
36 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
37 | inds = np.arange(N)
38 |
39 | for i in range(N):
40 | maxscore = boxes[i, 4]
41 | maxpos = i
42 |
43 | tx1 = boxes[i, 0]
44 | ty1 = boxes[i, 1]
45 | tx2 = boxes[i, 2]
46 | ty2 = boxes[i, 3]
47 | ts = boxes[i, 4]
48 | ti = inds[i]
49 |
50 | pos = i + 1
51 | # get max box
52 | while pos < N:
53 | if maxscore < boxes[pos, 4]:
54 | maxscore = boxes[pos, 4]
55 | maxpos = pos
56 | pos = pos + 1
57 |
58 | # add max box as a detection
59 | boxes[i, 0] = boxes[maxpos, 0]
60 | boxes[i, 1] = boxes[maxpos, 1]
61 | boxes[i, 2] = boxes[maxpos, 2]
62 | boxes[i, 3] = boxes[maxpos, 3]
63 | boxes[i, 4] = boxes[maxpos, 4]
64 | inds[i] = inds[maxpos]
65 |
66 | # swap ith box with position of max box
67 | boxes[maxpos, 0] = tx1
68 | boxes[maxpos, 1] = ty1
69 | boxes[maxpos, 2] = tx2
70 | boxes[maxpos, 3] = ty2
71 | boxes[maxpos, 4] = ts
72 | inds[maxpos] = ti
73 |
74 | tx1 = boxes[i, 0]
75 | ty1 = boxes[i, 1]
76 | tx2 = boxes[i, 2]
77 | ty2 = boxes[i, 3]
78 | ts = boxes[i, 4]
79 |
80 | pos = i + 1
81 | # NMS iterations, note that N changes if detection boxes fall below
82 | # threshold
83 | while pos < N:
84 | x1 = boxes[pos, 0]
85 | y1 = boxes[pos, 1]
86 | x2 = boxes[pos, 2]
87 | y2 = boxes[pos, 3]
88 | s = boxes[pos, 4]
89 |
90 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
91 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
92 | if iw > 0:
93 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
94 | if ih > 0:
95 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
96 | ov = iw * ih / ua # iou between max box and detection box
97 |
98 | if method == 1: # linear
99 | if ov > iou_thr:
100 | weight = 1 - ov
101 | else:
102 | weight = 1
103 | elif method == 2: # gaussian
104 | weight = np.exp(-(ov * ov) / sigma)
105 | else: # original NMS
106 | if ov > iou_thr:
107 | weight = 0
108 | else:
109 | weight = 1
110 |
111 | boxes[pos, 4] = weight * boxes[pos, 4]
112 |
113 | # if box score falls below threshold, discard the box by
114 | # swapping with last box update N
115 | if boxes[pos, 4] < min_score:
116 | boxes[pos, 0] = boxes[N-1, 0]
117 | boxes[pos, 1] = boxes[N-1, 1]
118 | boxes[pos, 2] = boxes[N-1, 2]
119 | boxes[pos, 3] = boxes[N-1, 3]
120 | boxes[pos, 4] = boxes[N-1, 4]
121 | inds[pos] = inds[N - 1]
122 | N = N - 1
123 | pos = pos - 1
124 |
125 | pos = pos + 1
126 |
127 | return boxes[:N], inds[:N]
128 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 |
4 | import glob
5 | import os
6 | from setuptools import find_packages, setup
7 | import torch
8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
9 |
10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
12 |
13 |
14 | def get_extensions():
15 | this_dir = os.path.dirname(os.path.abspath(__file__))
16 | extensions_dir = os.path.join(this_dir, "det", "layers", "csrc")
17 |
18 | main_source = os.path.join(extensions_dir, "vision.cpp")
19 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
20 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
21 | os.path.join(extensions_dir, "*.cu")
22 | )
23 |
24 | sources = [main_source] + sources
25 |
26 | extension = CppExtension
27 |
28 | extra_compile_args = {"cxx": []}
29 | define_macros = []
30 |
31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 | extension = CUDAExtension
33 | sources += source_cuda
34 | define_macros += [("WITH_CUDA", None)]
35 | extra_compile_args["nvcc"] = [
36 | "-DCUDA_HAS_FP16=1",
37 | "-D__CUDA_NO_HALF_OPERATORS__",
38 | "-D__CUDA_NO_HALF_CONVERSIONS__",
39 | "-D__CUDA_NO_HALF2_OPERATORS__",
40 | ]
41 |
42 | # It's better if pytorch can do this by default ..
43 | CC = os.environ.get("CC", None)
44 | if CC is not None:
45 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
46 |
47 | sources = [os.path.join(extensions_dir, s) for s in sources]
48 |
49 | include_dirs = [extensions_dir]
50 |
51 | ext_modules = [
52 | extension(
53 | "det._C",
54 | sources,
55 | include_dirs=include_dirs,
56 | define_macros=define_macros,
57 | extra_compile_args=extra_compile_args,
58 | )
59 | ]
60 |
61 | return ext_modules
62 |
63 |
64 | setup(
65 | name="Det",
66 | version="0.1.1",
67 | author="HTF",
68 | url="",
69 | description=""
70 | "platform for instance-level detection tasks based on Detectron2.",
71 | packages=find_packages(exclude=("configs", "tests")),
72 | python_requires=">=3.6",
73 | install_requires=[
74 | "termcolor>=1.1",
75 | "Pillow>=6.0",
76 | "yacs>=0.1.6",
77 | "tabulate",
78 | "cloudpickle",
79 | "matplotlib",
80 | "tqdm>4.29.0",
81 | "tensorboard",
82 | "python-Levenshtein",
83 | "Polygon3",
84 | "shapely",
85 | ],
86 | extras_require={"all": ["psutil"]},
87 | ext_modules=get_extensions(),
88 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
89 | )
--------------------------------------------------------------------------------
/tools/compute_flops.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from detectron2.engine import default_argument_parser, default_setup
3 |
4 | from det.config import get_cfg
5 | from det.utils.measures import measure_model
6 |
7 | from train_net import Trainer
8 |
9 |
10 | def setup(args):
11 | """
12 | Create configs and perform basic setups.
13 | """
14 | cfg = get_cfg()
15 | cfg.merge_from_file(args.config_file)
16 | cfg.merge_from_list(args.opts)
17 | cfg.freeze()
18 | default_setup(cfg, args)
19 | return cfg
20 |
21 |
22 | def main(args):
23 | cfg = setup(args)
24 |
25 | model = Trainer.build_model(cfg)
26 | model.eval().cuda()
27 | input_size = (3, 512, 512)
28 | image = torch.zeros(*input_size)
29 | batched_input = {"image": image}
30 | ops, params = measure_model(model, [batched_input])
31 | print('ops: {:.2f}G\tparams: {:.2f}M'.format(ops / 2**30, params / 2**20))
32 |
33 |
34 | if __name__ == "__main__":
35 | args = default_argument_parser().parse_args()
36 | print("Command Line Args:", args)
37 | main(args)
38 |
--------------------------------------------------------------------------------
/tools/visualize_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | import argparse
4 | import numpy as np
5 | import os
6 | from itertools import chain
7 | import cv2
8 | import tqdm
9 | from PIL import Image
10 |
11 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
12 | from detectron2.data import detection_utils as utils
13 | from detectron2.data.build import filter_images_with_few_keypoints
14 | from detectron2.utils.logger import setup_logger
15 | from detectron2.utils.visualizer import Visualizer
16 |
17 | from det.config import get_cfg
18 | from det.data.dataset_mapper import DatasetMapperWithBasis
19 |
20 |
21 | def setup(args):
22 | cfg = get_cfg()
23 | if args.config_file:
24 | cfg.merge_from_file(args.config_file)
25 | cfg.merge_from_list(args.opts)
26 | cfg.freeze()
27 | return cfg
28 |
29 |
30 | def parse_args(in_args=None):
31 | parser = argparse.ArgumentParser(description="Visualize ground-truth data")
32 | parser.add_argument(
33 | "--source",
34 | choices=["annotation", "dataloader"],
35 | required=True,
36 | help="visualize the annotations or the data loader (with pre-processing)",
37 | )
38 | parser.add_argument("--config-file", metavar="FILE", help="path to config file")
39 | parser.add_argument("--output-dir", default="./", help="path to output directory")
40 | parser.add_argument("--show", action="store_true", help="show output in a window")
41 | parser.add_argument(
42 | "--opts",
43 | help="Modify config options using the command-line",
44 | default=[],
45 | nargs=argparse.REMAINDER,
46 | )
47 | return parser.parse_args(in_args)
48 |
49 |
50 | if __name__ == "__main__":
51 | args = parse_args()
52 | logger = setup_logger()
53 | logger.info("Arguments: " + str(args))
54 | cfg = setup(args)
55 |
56 | dirname = args.output_dir
57 | os.makedirs(dirname, exist_ok=True)
58 | metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
59 |
60 | def output(vis, fname):
61 | if args.show:
62 | print(fname)
63 | cv2.imshow("window", vis.get_image()[:, :, ::-1])
64 | cv2.waitKey()
65 | else:
66 | filepath = os.path.join(dirname, fname)
67 | print("Saving to {} ...".format(filepath))
68 | vis.save(filepath)
69 |
70 | scale = 2.0 if args.show else 1.0
71 | if args.source == "dataloader":
72 | mapper = DatasetMapperWithBasis(cfg, True)
73 | train_data_loader = build_detection_train_loader(cfg, mapper)
74 | for batch in train_data_loader:
75 | for per_image in batch:
76 | # Pytorch tensor is in (C, H, W) format
77 | img = per_image["image"].permute(1, 2, 0)
78 | if cfg.INPUT.FORMAT == "BGR":
79 | img = img[:, :, [2, 1, 0]]
80 | else:
81 | img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))
82 |
83 | visualizer = Visualizer(img, metadata=metadata, scale=scale)
84 | target_fields = per_image["instances"].get_fields()
85 | labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
86 | vis = visualizer.overlay_instances(
87 | labels=labels,
88 | boxes=target_fields.get("gt_boxes", None),
89 | masks=target_fields.get("gt_masks", None),
90 | keypoints=target_fields.get("gt_keypoints", None),
91 | )
92 | output(vis, str(per_image["image_id"]) + ".jpg")
93 | else:
94 | dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
95 | if cfg.MODEL.KEYPOINT_ON:
96 | dicts = filter_images_with_few_keypoints(dicts, 1)
97 | for dic in tqdm.tqdm(dicts):
98 | img = utils.read_image(dic["file_name"], "RGB")
99 | visualizer = Visualizer(img, metadata=metadata, scale=scale)
100 | vis = visualizer.draw_dataset_dict(dic)
101 | output(vis, os.path.basename(dic["file_name"]))
--------------------------------------------------------------------------------