├── .vscode └── settings.json ├── README.md ├── configs ├── FCOS-Detection │ ├── Base-ATSS.yaml │ ├── Base-FCOS.yaml │ ├── FCOS_RT │ │ ├── MS_DLA_34_4x_syncbn.yaml │ │ ├── MS_DLA_34_4x_syncbn_bn_head.yaml │ │ ├── MS_DLA_34_4x_syncbn_shared_towers.yaml │ │ ├── MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml │ │ ├── MS_R_50_4x_syncbn.yaml │ │ └── MS_R_50_4x_syncbn_bn_head.yaml │ ├── MS_R_101_2x.yaml │ ├── MS_R_50_2x.yaml │ ├── MS_X_101_32x8d_2x.yaml │ ├── MS_X_101_32x8d_2x_dcnv2.yaml │ ├── MS_X_101_64x4d_2x.yaml │ ├── MS_X_101_64x4d_2x_dcnv2.yaml │ ├── README.md │ ├── RS_50_1x.yaml │ ├── R_50_1x.yaml │ ├── atss_r_50.yaml │ └── vovnet │ │ ├── MS_V_39_3x.yaml │ │ ├── MS_V_57_3x.yaml │ │ ├── MS_V_99_3x.yaml │ │ └── README.md └── RCNN │ ├── 550_R_50_FPN_3x.yaml │ ├── Base-RCNN-FPN.yaml │ ├── Base-RCNN.yaml │ ├── LVIS │ └── R_50_1x.yaml │ ├── R_101_3x.yaml │ ├── faster_rcnn_RS_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_1x_tta.yaml │ └── libra_rcnn │ └── r_50_1x.yaml ├── det ├── _C.cpython-36m-x86_64-linux-gnu.so ├── __init__.py ├── __pycache__ │ └── __init__.cpython-36.pyc ├── checkpoint │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── det_checkpoint.cpython-36.pyc │ └── det_checkpoint.py ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── config.cpython-36.pyc │ │ └── defaults.cpython-36.pyc │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── builtin.cpython-36.pyc │ │ ├── dataset_mapper.cpython-36.pyc │ │ └── detection_utils.cpython-36.pyc │ ├── builtin.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── __pycache__ │ │ │ └── text.cpython-36.pyc │ │ ├── augment_lists.py │ │ ├── fast_augment.py │ │ └── text.py │ └── detection_utils.py ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── balanced_l1_loss.cpython-36.pyc │ │ ├── bezier_align.cpython-36.pyc │ │ ├── conv_with_kaiming_uniform.cpython-36.pyc │ │ ├── def_roi_align.cpython-36.pyc │ │ ├── deform_conv.cpython-36.pyc │ │ ├── gcn.cpython-36.pyc │ │ ├── iou_loss.cpython-36.pyc │ │ ├── ml_nms.cpython-36.pyc │ │ ├── naive_group_norm.cpython-36.pyc │ │ └── soft_nms.cpython-36.pyc │ ├── balanced_l1_loss.py │ ├── bezier_align.py │ ├── conv_with_kaiming_uniform.py │ ├── csrc │ │ ├── BezierAlign │ │ │ ├── BezierAlign.h │ │ │ ├── BezierAlign_cpu.cpp │ │ │ └── BezierAlign_cuda.cu │ │ ├── DefROIAlign │ │ │ ├── DefROIAlign.h │ │ │ └── DefROIAlign_cuda.cu │ │ ├── cuda_version.cu │ │ ├── ml_nms │ │ │ ├── ml_nms.cu │ │ │ └── ml_nms.h │ │ └── vision.cpp │ ├── def_roi_align.py │ ├── deform_conv.py │ ├── gcn.py │ ├── iou_loss.py │ ├── ml_nms.py │ ├── naive_group_norm.py │ └── soft_nms.py ├── modeling │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── one_stage_detector.cpython-36.pyc │ │ └── rpn_utils.cpython-36.pyc │ ├── atss │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── atss.cpython-36.pyc │ │ │ └── atss_outputs.cpython-36.pyc │ │ ├── atss.py │ │ └── atss_outputs.py │ ├── backbone │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── dla.cpython-36.pyc │ │ │ ├── fpn.cpython-36.pyc │ │ │ ├── lpf.cpython-36.pyc │ │ │ ├── mobilenet.cpython-36.pyc │ │ │ ├── resnet.cpython-36.pyc │ │ │ ├── resnet_interval.cpython-36.pyc │ │ │ ├── resnet_lpf.cpython-36.pyc │ │ │ ├── splat.cpython-36.pyc │ │ │ └── vovnet.cpython-36.pyc │ │ ├── dla.py │ │ ├── fpn.py │ │ ├── lpf.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ ├── resnet_interval.py │ │ ├── resnet_lpf.py │ │ ├── splat.py │ │ └── vovnet.py │ ├── fcos │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── fcos.cpython-36.pyc │ │ │ └── fcos_outputs.cpython-36.pyc │ │ ├── fcos.py │ │ └── fcos_outputs.py │ ├── guided_anchoring │ │ ├── ga_outputs.py │ │ └── guided_anchor_head.py │ ├── one_stage_detector.py │ ├── poolers.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── libra_rcnn.cpython-36.pyc │ │ └── libra_rcnn.py │ ├── rpn_utils.py │ └── tsd │ │ └── tsd.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── comm.cpython-36.pyc │ ├── comm.py │ ├── measures.py │ └── visualizer.py ├── docs └── nms │ ├── README.md │ ├── __init__.py │ ├── demo_nms.cpp │ ├── demo_nms.py │ ├── nms.cpp │ ├── nms.py │ ├── nms_wrapper.py │ └── src │ ├── nms_cpu.cpp │ ├── nms_cuda.cpp │ ├── nms_kernel.cu │ ├── soft_nms_cpu.cpp │ └── soft_nms_cpu.pyx ├── setup.py └── tools ├── compute_flops.py ├── train_net.py └── visualize_data.py /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/home/fei/anaconda3/envs/detectron2/bin/python" 3 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object-Detection.pytorch 2 | 3 | ## bdd100k Dataset Baseline 4 | - we use `mmdetection` to train all models. 5 | - All models were trained on `bdd100k_train`, and tested on the `bdd100k_val`. 6 | - We use distributed training across 8 Nvdia-1080Ti GPUs. 7 | 8 | ### Anchor-based: 9 | | Name | backbone | tricks | AP | AP50 | AP75 | APs | APm | APl | 10 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:| 11 | | FasterRCNN | R_50_FPN | | 0.318 | 0.551 | 0.311 | 0.145 | 0.356 | 0.497| 12 | | FasterRCNN | R_101_FPN| | 0.322 | 0.553 | 0.314 | 0.142 | 0.360 | 0.512 | 13 | | CascadeRCNN | R_50_FPN | | 0.332 | 0.558 | 0.331 | 0.150 | 0.371 | 0.520 | 14 | | PISA | R_50_FPN | | 15 | | LibraRCNN| R_50_FPN| | 16 | | GA | R_50_FPN | | 17 | 18 | 19 | ### Anchor-free 20 | | Name | backbone | tricks | AP | AP50 | AP75 | APs | APm | APl | 21 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:| 22 | | FCOS | R_50_FPN | |0.304 | 0.539 | 0.290 | 0.129 | 0.338 | 0.498 | 23 | | ATSS | R_50_FPN | | 0.329 | 0.562 | 0.323 | 0.141 | 0.367 | 0.517| 24 | | CenterNet| R_50_DCN| 25 | | RepPoints| R_50_FPN | | 0.312 | 0.555 | 0.297 | 0.129 | 0.348 | 0.505 | 26 | 27 | ## CenterNet series 28 | | Name | backbone | Iters | AP | AP50 | AP75 | APs | APm | APl | 29 | | :------: |:------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:| 30 | | CenterNet| R_50_DCN |125997 | 27.5269|44.7613| 28.8301 |9.6805 | 31.4682 |43.1641 | 31 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/Base-ATSS.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "OneStageDetector" 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res3", "res4", "res5"] 9 | PROPOSAL_GENERATOR: 10 | NAME: "ATSS" 11 | ANCHOR_GENERATOR: 12 | NAME: "ATSSAnchorGenerator" 13 | 14 | 15 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] 16 | DATASETS: 17 | TRAIN: ("coco_2017_train",) 18 | TEST: ("coco_2017_val",) 19 | SOLVER: 20 | IMS_PER_BATCH: 16 21 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 22 | STEPS: (60000, 80000) 23 | MAX_ITER: 90000 24 | INPUT: 25 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -------------------------------------------------------------------------------- /configs/FCOS-Detection/Base-FCOS.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "OneStageDetector" 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res3", "res4", "res5"] 9 | PROPOSAL_GENERATOR: 10 | NAME: "FCOS" 11 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] 12 | DATASETS: 13 | TRAIN: ("coco_2017_train",) 14 | TEST: ("coco_2017_val",) 15 | SOLVER: 16 | IMS_PER_BATCH: 16 17 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | SOLVER: 23 | STEPS: (300000, 340000) 24 | MAX_ITER: 360000 25 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn" 26 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NORM: "SyncBN" 23 | SOLVER: 24 | STEPS: (300000, 340000) 25 | MAX_ITER: 360000 26 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_bn_head" 27 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NUM_SHARE_CONVS: 4 23 | NUM_BOX_CONVS: 0 24 | NUM_CLS_CONVS: 0 25 | SOLVER: 26 | STEPS: (300000, 340000) 27 | MAX_ITER: 360000 28 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers" 29 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NUM_SHARE_CONVS: 4 23 | NUM_BOX_CONVS: 0 24 | NUM_CLS_CONVS: 0 25 | NORM: "SyncBN" 26 | SOLVER: 27 | STEPS: (300000, 340000) 28 | MAX_ITER: 360000 29 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers_bn_head" 30 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | FCOS: 13 | TOP_LEVELS: 0 14 | SIZES_OF_INTEREST: [64, 128] 15 | FPN_STRIDES: [8, 16, 32] 16 | IN_FEATURES: ['p3', 'p4', 'p5'] 17 | SOLVER: 18 | STEPS: (300000, 340000) 19 | MAX_ITER: 360000 20 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn" 21 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | FCOS: 13 | TOP_LEVELS: 0 14 | SIZES_OF_INTEREST: [64, 128] 15 | FPN_STRIDES: [8, 16, 32] 16 | IN_FEATURES: ['p3', 'p4', 'p5'] 17 | NORM: "SyncBN" 18 | SOLVER: 19 | STEPS: (300000, 340000) 20 | MAX_ITER: 360000 21 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn_bn_head" 22 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_101_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (120000, 160000) 8 | MAX_ITER: 180000 9 | OUTPUT_DIR: "output/fcos/R_101_2x" 10 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_50_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (120000, 160000) 8 | MAX_ITER: 180000 9 | OUTPUT_DIR: "output/fcos/R_50_2x" 10 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (120000, 160000) 12 | MAX_ITER: 180000 13 | OUTPUT_DIR: "output/fcos/X_101_2x" 14 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 11 | DEFORM_MODULATED: True 12 | FCOS: 13 | USE_DEFORMABLE: True 14 | SOLVER: 15 | STEPS: (120000, 160000) 16 | MAX_ITER: 180000 17 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2" 18 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_64x4d_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d" 4 | PIXEL_STD: [1.0, 1.0, 1.0] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 64 8 | WIDTH_PER_GROUP: 4 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (120000, 160000) 12 | MAX_ITER: 180000 13 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x" 14 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_64x4d_2x_dcnv2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d" 4 | PIXEL_STD: [1.0, 1.0, 1.0] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 64 8 | WIDTH_PER_GROUP: 4 9 | DEPTH: 101 10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 11 | DEFORM_MODULATED: True 12 | FCOS: 13 | USE_DEFORMABLE: True 14 | SOLVER: 15 | STEPS: (120000, 160000) 16 | MAX_ITER: 180000 17 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x_dcnv2" 18 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | FCOS: Fully Convolutional One-Stage Object Detection; 4 | Zhi Tian, Chunhua Shen, Hao Chen, and Tong He; 5 | In: Proc. Int. Conf. Computer Vision (ICCV), 2019. 6 | arXiv preprint arXiv:1904.01355 7 | 8 | [[`Paper`](https://arxiv.org/abs/1904.01355)] [[`BibTeX`](#citing-fcos)] 9 | 10 | # Installation & Quick Start 11 | No special setup needed. The [default instruction](../../README.md#Installation) is fine. 12 | 13 | ## Models 14 | ### COCO Object Detecton Baselines with [FCOS](https://arxiv.org/abs/1904.01355) 15 | 16 | Name | inf. time | box AP | download 17 | --- |:---:|:---:|:---: 18 | [FCOS_R_50_1x](R_50_1x.yaml) | 16 FPS | 38.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/glqFc13cCoEyHYy/download) 19 | [FCOS_MS_R_50_2x](MS_R_50_2x.yaml) | 16 FPS | 41.0 | [model](https://cloudstor.aarnet.edu.au/plus/s/reA6HVaGX47yKGV/download) 20 | [FCOS_MS_R_101_2x](MS_R_101_2x.yaml) | 12 FPS | 43.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download) 21 | [FCOS_MS_X_101_32x8d_2x](MS_X_101_32x8d_2x.yaml) | 6.6 FPS | 43.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/R7H00WeWKZG45pP/download) 22 | [FCOS_MS_X_101_64x4d_2x](MS_X_101_64x4d_2x.yaml) | 6.1 FPS | 44.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/XOLUCzqKYckNII7/download) 23 | [FCOS_MS_X_101_32x8d_dcnv2_2x](MS_X_101_32x8d_2x_dcnv2.yaml) | 4.6 FPS | 46.6 | [model](https://cloudstor.aarnet.edu.au/plus/s/TDsnYK8OXDTrafF/download) 24 | 25 | *Except for FCOS_R_50_1x, all other models are trained with multi-scale data augmentation.* 26 | 27 | ### FCOS Real-time Models 28 | 29 | Name | inf. time | box AP | download 30 | --- |:---:|:---:|:---: 31 | [FCOS_RT_MS_DLA_34_4x_shtw](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml) | 52 FPS | 39.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/4vc3XwQezyhNvnB/download) 32 | [FCOS_RT_MS_DLA_34_4x](FCOS_RT/MS_DLA_34_4x_syncbn.yaml) | 46 FPS | 40.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/zNPNyTkizaOOsUQ/download) 33 | [FCOS_RT_MS_R_50_4x](FCOS_RT/MS_R_50_4x_syncbn.yaml) | 38 FPS | 40.2 | [model](https://cloudstor.aarnet.edu.au/plus/s/TlnlXUr6lNNSyoZ/download) 34 | 35 | If you prefer BN in FCOS heads, please try the following models. 36 | 37 | Name | inf. time | box AP | download 38 | --- |:---:|:---:|:---: 39 | [FCOS_RT_MS_DLA_34_4x_shtw_bn](FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml) | 52 FPS | 38.9 | [model](https://cloudstor.aarnet.edu.au/plus/s/rdmHHSs4oCg7l7U/download) 40 | [FCOS_RT_MS_DLA_34_4x_bn](FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml) | 48 FPS | 39.4 | [model](https://cloudstor.aarnet.edu.au/plus/s/T5httPVo1VndbD4/download) 41 | [FCOS_RT_MS_R_50_4x_bn](FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml) | 40 FPS | 39.3 | [model](https://cloudstor.aarnet.edu.au/plus/s/dHNUNs0YxVhZAmg/download) 42 | 43 | *Inference time is measured on a NVIDIA 1080Ti with batch size 1. Real-time models use shorter side 512 for inference.* 44 | 45 | # Citing FCOS 46 | If you use FCOS in your research or wish to refer to the baseline results, please use the following BibTeX entries. 47 | ```BibTeX 48 | @inproceedings{tian2019fcos, 49 | title = {{FCOS}: Fully Convolutional One-Stage Object Detection}, 50 | author = {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 51 | booktitle = {Proc. Int. Conf. Computer Vision (ICCV)}, 52 | year = {2019} 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/RS_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | #WEIGHTS: "https://hangzh.s3-us-west-1.amazonaws.com/encoding/models/resnest50_detectron-255b5649.pth" 4 | #"detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | RADIX: 2 10 | DEEP_STEM: True 11 | AVD: True 12 | AVG_DOWN: True 13 | NORM: "SyncBN" 14 | FPN: 15 | NORM: "SyncBN" 16 | ROI_BOX_HEAD: 17 | NAME: "FastRCNNConvFCHead" 18 | NUM_CONV: 4 19 | NUM_FC: 1 20 | NORM: "SyncBN" 21 | PIXEL_MEAN: [123.68, 116.779, 103.939] 22 | PIXEL_STD: [58.393, 57.12, 57.375] 23 | SOLVER: 24 | IMS_PER_BATCH: 16 #16 25 | BASE_LR: 0.02 #original:0.02 26 | INPUT: 27 | MIN_SIZE_TRAIN: (640, 800) 28 | MIN_SIZE_TRAIN_SAMPLING: "range" 29 | MAX_SIZE_TRAIN: 1333 30 | FORMAT: "RGB" -------------------------------------------------------------------------------- /configs/FCOS-Detection/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | INPUT: 7 | MIN_SIZE_TRAIN: (800,) 8 | SOLVER: 9 | WARMUP_METHOD: "constant" 10 | WARMUP_FACTOR: 0.3333 11 | WARMUP_ITERS: 500 12 | OUTPUT_DIR: "output/fcos/R_50_1x" 13 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/atss_r_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-ATSS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | INPUT: 7 | MIN_SIZE_TRAIN: (800,) 8 | SOLVER: 9 | WARMUP_METHOD: "constant" 10 | WARMUP_FACTOR: 0.3333 11 | WARMUP_ITERS: 500 12 | OUTPUT_DIR: "output/atss/R_50_1x" -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_39_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-39-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_39_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_57_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-57-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_57_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_99_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-99-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_99_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/README.md: -------------------------------------------------------------------------------- 1 | # [VoVNet-v2](https://github.com/youngwanLEE/CenterMask) backbone networks in [FCOS](https://github.com/aim-uofa/det) 2 | **Efficient Backbone Network for Object Detection and Segmentation**\ 3 | Youngwan Lee 4 | 5 | 6 | [[`vovnet-detectron2`](https://github.com/youngwanLEE/vovnet-detectron2)][[`CenterMask(code)`](https://github.com/youngwanLEE/CenterMask)] [[`VoVNet-v1(arxiv)`](https://arxiv.org/abs/1904.09730)] [[`VoVNet-v2(arxiv)`](https://arxiv.org/abs/1911.06667)] [[`BibTeX`](#CitingVoVNet)] 7 | 8 | 9 |
10 | 11 |
12 | 13 | 14 | ## Comparison with Faster R-CNN and ResNet 15 | 16 | ### Note 17 | 18 | We measure the inference time of all models with batch size 1 on the same V100 GPU machine. 19 | 20 | - pytorch1.3.1 21 | - CUDA 10.1 22 | - cuDNN 7.3 23 | 24 | 25 | |Method|Backbone|lr sched|inference time|AP|APs|APm|APl|download| 26 | |---|:--------:|:---:|:--:|--|----|----|---|--------| 27 | |Faster|R-50-FPN|3x|0.047|40.2|24.2|43.5|52.0|model \| metrics 28 | |Faster|**V2-39-FPN**|3x|0.047|42.7|27.1|45.6|54.0|model \| metrics 29 | |**FCOS**|**V2-39-FPN**|3x|0.045|43.5|28.1|47.2|54.5|model \| metrics 30 | || 31 | |Faster|R-101-FPN|3x|0.063|42.0|25.2|45.6|54.6|model \| metrics 32 | |Faster|**V2-57-FPN**|3x|0.054|43.3|27.5|46.7|55.3|model \| metrics 33 | |**FCOS**|**V2-57-FPN**|3x|0.051|44.4|28.8|47.2|56.3|model \| metrics 34 | || 35 | |Faster|X-101-FPN|3x|0.120|43.0|27.2|46.1|54.9|model \| metrics| 36 | |Faster|**V2-99-FPN**|3x|0.073|44.1|28.1|47.0|56.4|model \| metrics| 37 | |**FCOS**|**V2-99-FPN**|3x|0.070|45.2|29.2|48.4|57.3|model \| metrics| 38 | 39 | 40 | 41 | ## Citing VoVNet 42 | 43 | If you use VoVNet, please use the following BibTeX entry. 44 | 45 | ```BibTeX 46 | @inproceedings{lee2019energy, 47 | title = {An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection}, 48 | author = {Lee, Youngwan and Hwang, Joong-won and Lee, Sangrok and Bae, Yuseok and Park, Jongyoul}, 49 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops}, 50 | year = {2019} 51 | } 52 | 53 | @article{lee2019centermask, 54 | title={CenterMask: Real-Time Anchor-Free Instance Segmentation}, 55 | author={Lee, Youngwan and Park, Jongyoul}, 56 | journal={arXiv preprint arXiv:1911.06667}, 57 | year={2019} 58 | } 59 | ``` 60 | -------------------------------------------------------------------------------- /configs/RCNN/550_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "output/mask_rcnn/550_R_50_3x/model_final.pth" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | INPUT: 11 | MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550) 12 | MAX_SIZE_TRAIN: 916 13 | MIN_SIZE_TEST: 550 14 | MAX_SIZE_TEST: 916 15 | OUTPUT_DIR: "output/mask_rcnn/550_R_50_3x" 16 | -------------------------------------------------------------------------------- /configs/RCNN/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/RCNN/Base-RCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/RCNN/LVIS/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-LVIS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/lvis/mask_rcnn/R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/RCNN/R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/RCNN/faster_rcnn_RS_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | #WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNEST: True 5 | BACKBONE: 6 | NAME: "build_resnest_fpn_backbone" 7 | MASK_ON: False 8 | RESNETS: 9 | DEPTH: 50 10 | STRIDE_IN_1X1: False 11 | RADIX: 2 12 | DEEP_STEM: True 13 | AVD: True 14 | AVG_DOWN: True 15 | NORM: "GN" 16 | -------------------------------------------------------------------------------- /configs/RCNN/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/RCNN/faster_rcnn_R_50_FPN_1x_tta.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | TEST: 8 | AUG: 9 | ENABLED: True -------------------------------------------------------------------------------- /configs/RCNN/libra_rcnn/r_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: "LibraRCNNROIHeads" 9 | IN_FEATURES: ["p2", "p3", "p4", "p5"] -------------------------------------------------------------------------------- /det/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /det/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from det import modeling 3 | 4 | __version__ = "0.1.1" -------------------------------------------------------------------------------- /det/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | from .det_checkpoint import detCheckpointer 2 | 3 | __all__ = ["detCheckpointer"] 4 | -------------------------------------------------------------------------------- /det/checkpoint/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/checkpoint/__pycache__/det_checkpoint.cpython-36.pyc -------------------------------------------------------------------------------- /det/checkpoint/det_checkpoint.py: -------------------------------------------------------------------------------- 1 | import pickle, os 2 | from fvcore.common.file_io import PathManager 3 | from detectron2.checkpoint import DetectionCheckpointer 4 | 5 | 6 | class detCheckpointer(DetectionCheckpointer): 7 | """ 8 | Same as :class:`DetectronCheckpointer`, but is able to convert models 9 | in AdelaiDet, such as LPF backbone. 10 | """ 11 | def _load_file(self, filename): 12 | if filename.endswith(".pkl"): 13 | with PathManager.open(filename, "rb") as f: 14 | data = pickle.load(f, encoding="latin1") 15 | if "model" in data and "__author__" in data: 16 | # file is in Detectron2 model zoo format 17 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 18 | return data 19 | else: 20 | # assume file is from Caffe2 / Detectron1 model zoo 21 | if "blobs" in data: 22 | # Detection models have "blobs", but ImageNet models don't 23 | data = data["blobs"] 24 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 25 | if "weight_order" in data: 26 | del data["weight_order"] 27 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 28 | 29 | loaded = super()._load_file(filename) # load native pth checkpoint 30 | if "model" not in loaded: 31 | loaded = {"model": loaded} 32 | 33 | basename = os.path.basename(filename).lower() 34 | if "lpf" in basename or "dla" in basename: 35 | loaded["matching_heuristics"] = True 36 | return loaded 37 | -------------------------------------------------------------------------------- /det/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import get_cfg 2 | 3 | __all__ = [ 4 | "get_cfg", 5 | ] 6 | -------------------------------------------------------------------------------- /det/config/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/config/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /det/config/__pycache__/defaults.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/config/__pycache__/defaults.cpython-36.pyc -------------------------------------------------------------------------------- /det/config/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode 2 | 3 | 4 | def get_cfg() -> CfgNode: 5 | """ 6 | Get a copy of the default config. 7 | 8 | Returns: 9 | a detectron2 CfgNode instance. 10 | """ 11 | from .defaults import _C 12 | 13 | return _C.clone() 14 | -------------------------------------------------------------------------------- /det/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import builtin # ensure the builtin datasets are registered 2 | from .dataset_mapper import DatasetMapperWithBasis 3 | 4 | 5 | __all__ = ["DatasetMapperWithBasis"] 6 | #grid mask trick 7 | #https://github.com/Jia-Research-Lab/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py -------------------------------------------------------------------------------- /det/data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/data/__pycache__/builtin.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/builtin.cpython-36.pyc -------------------------------------------------------------------------------- /det/data/__pycache__/dataset_mapper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/dataset_mapper.cpython-36.pyc -------------------------------------------------------------------------------- /det/data/__pycache__/detection_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/__pycache__/detection_utils.cpython-36.pyc -------------------------------------------------------------------------------- /det/data/builtin.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from detectron2.data.datasets.register_coco import register_coco_instances 4 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 5 | 6 | from .datasets.text import register_text_instances 7 | 8 | # register plane reconstruction 9 | 10 | _PREDEFINED_SPLITS_PIC = { 11 | "pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"), 12 | "pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"), 13 | } 14 | 15 | metadata_pic = { 16 | "thing_classes": ["person"] 17 | } 18 | 19 | _PREDEFINED_SPLITS_TEXT = { 20 | "totaltext_train": ("totaltext/train_images", "totaltext/train.json"), 21 | "totaltext_val": ("totaltext/test_images", "totaltext/test.json"), 22 | "ctw1500_word_train": ("CTW1500/ctwtrain_text_image", "CTW1500/annotations/train_ctw1500_maxlen100_v2.json"), 23 | "ctw1500_word_test": ("CTW1500/ctwtest_text_image","CTW1500/annotations/test_ctw1500_maxlen100.json"), 24 | "syntext1_train": ("syntext1/images", "syntext1/annotations/train.json"), 25 | "syntext2_train": ("syntext2/images", "syntext2/annotations/train.json"), 26 | "mltbezier_word_train": ("mlt2017/images","mlt2017/annotations/train.json"), 27 | } 28 | 29 | metadata_text = { 30 | "thing_classes": ["text"] 31 | } 32 | 33 | 34 | def register_all_coco(root="datasets"): 35 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items(): 36 | # Assume pre-defined datasets live in `./datasets`. 37 | register_coco_instances( 38 | key, 39 | metadata_pic, 40 | os.path.join(root, json_file) if "://" not in json_file else json_file, 41 | os.path.join(root, image_root), 42 | ) 43 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items(): 44 | # Assume pre-defined datasets live in `./datasets`. 45 | register_text_instances( 46 | key, 47 | metadata_text, 48 | os.path.join(root, json_file) if "://" not in json_file else json_file, 49 | os.path.join(root, image_root), 50 | ) 51 | 52 | 53 | register_all_coco() -------------------------------------------------------------------------------- /det/data/dataset_mapper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | import os.path as osp 4 | import torch 5 | from fvcore.common.file_io import PathManager 6 | from PIL import Image 7 | import logging 8 | 9 | from detectron2.data.dataset_mapper import DatasetMapper 10 | from detectron2.data.detection_utils import SizeMismatchError 11 | from detectron2.data import detection_utils as utils 12 | from detectron2.data import transforms as T 13 | 14 | from .detection_utils import ( 15 | build_transform_gen, 16 | transform_instance_annotations, 17 | annotations_to_instances, 18 | gen_crop_transform_with_instance, 19 | ) 20 | 21 | """ 22 | This file contains the default mapping that's applied to "dataset dicts". 23 | """ 24 | 25 | __all__ = ["DatasetMapperWithBasis"] 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | class DatasetMapperWithBasis(DatasetMapper): 31 | """ 32 | This caller enables the default Detectron2 mapper to read an additional basis semantic label 33 | """ 34 | 35 | def __init__(self, cfg, is_train=True): 36 | super().__init__(cfg, is_train) 37 | 38 | # Rebuild transform gen 39 | logger.info("Rebuilding the transform generators. The previous generators will be overridden.") 40 | self.tfm_gens = build_transform_gen(cfg, is_train) 41 | 42 | # fmt: off 43 | self.basis_loss_on = cfg.MODEL.BASIS_MODULE.LOSS_ON 44 | self.ann_set = cfg.MODEL.BASIS_MODULE.ANN_SET 45 | self.crop_box = cfg.INPUT.CROP.CROP_INSTANCE 46 | # fmt: on 47 | 48 | def __call__(self, dataset_dict): 49 | """ 50 | Args: 51 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 52 | 53 | Returns: 54 | dict: a format that builtin models in detectron2 accept 55 | """ 56 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 57 | # USER: Write your own image loading if it's not from a file 58 | try: 59 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format) 60 | except Exception as e: 61 | print(dataset_dict["file_name"]) 62 | print(e) 63 | raise e 64 | try: 65 | utils.check_image_size(dataset_dict, image) 66 | except SizeMismatchError as e: 67 | expected_wh = (dataset_dict["width"], dataset_dict["height"]) 68 | image_wh = (image.shape[1], image.shape[0]) 69 | if (image_wh[1], image_wh[0]) == expected_wh: 70 | print("transposing image {}".format(dataset_dict["file_name"])) 71 | image = image.transpose(1, 0, 2) 72 | else: 73 | raise e 74 | 75 | if "annotations" not in dataset_dict or len(dataset_dict["annotations"]) == 0: 76 | image, transforms = T.apply_transform_gens( 77 | ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image 78 | ) 79 | else: 80 | # Crop around an instance if there are instances in the image. 81 | # USER: Remove if you don't use cropping 82 | if self.crop_gen: 83 | crop_tfm = gen_crop_transform_with_instance( 84 | self.crop_gen.get_crop_size(image.shape[:2]), 85 | image.shape[:2], 86 | dataset_dict["annotations"], 87 | crop_box=self.crop_box, 88 | ) 89 | image = crop_tfm.apply_image(image) 90 | try: 91 | image, transforms = T.apply_transform_gens(self.tfm_gens, image) 92 | except ValueError as e: 93 | print(dataset_dict["file_name"]) 94 | raise e 95 | if self.crop_gen: 96 | transforms = crop_tfm + transforms 97 | 98 | image_shape = image.shape[:2] # h, w 99 | 100 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 101 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 102 | # Therefore it's important to use torch.Tensor. 103 | dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) 104 | # Can use uint8 if it turns out to be slow some day 105 | 106 | # USER: Remove if you don't use pre-computed proposals. 107 | if self.load_proposals: 108 | utils.transform_proposals( 109 | dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk 110 | ) 111 | 112 | if not self.is_train: 113 | dataset_dict.pop("annotations", None) 114 | dataset_dict.pop("sem_seg_file_name", None) 115 | dataset_dict.pop("pano_seg_file_name", None) 116 | return dataset_dict 117 | 118 | if "annotations" in dataset_dict: 119 | # USER: Modify this if you want to keep them for some reason. 120 | for anno in dataset_dict["annotations"]: 121 | if not self.mask_on: 122 | anno.pop("segmentation", None) 123 | if not self.keypoint_on: 124 | anno.pop("keypoints", None) 125 | 126 | # USER: Implement additional transformations if you have other types of data 127 | annos = [ 128 | transform_instance_annotations( 129 | obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices 130 | ) 131 | for obj in dataset_dict.pop("annotations") 132 | if obj.get("iscrowd", 0) == 0 133 | ] 134 | instances = annotations_to_instances( 135 | annos, image_shape, mask_format=self.mask_format 136 | ) 137 | # Create a tight bounding box from masks, useful when image is cropped 138 | if self.crop_gen and instances.has("gt_masks"): 139 | instances.gt_boxes = instances.gt_masks.get_bounding_boxes() 140 | dataset_dict["instances"] = utils.filter_empty_instances(instances) 141 | 142 | # USER: Remove if you don't do semantic/panoptic segmentation. 143 | if "sem_seg_file_name" in dataset_dict: 144 | with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: 145 | sem_seg_gt = Image.open(f) 146 | sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") 147 | sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) 148 | sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) 149 | dataset_dict["sem_seg"] = sem_seg_gt 150 | 151 | if self.basis_loss_on and self.is_train: 152 | # load basis supervisions 153 | if self.ann_set == "coco": 154 | basis_sem_path = dataset_dict["file_name"].replace('train2017', 'thing_train2017').replace('image/train', 'thing_train') 155 | else: 156 | basis_sem_path = dataset_dict["file_name"].replace('coco', 'lvis').replace('train2017', 'thing_train') 157 | # change extension to npz 158 | basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" 159 | basis_sem_gt = np.load(basis_sem_path)["mask"] 160 | basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) 161 | basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) 162 | dataset_dict["basis_sem"] = basis_sem_gt 163 | return dataset_dict 164 | -------------------------------------------------------------------------------- /det/data/datasets/__pycache__/text.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/data/datasets/__pycache__/text.cpython-36.pyc -------------------------------------------------------------------------------- /det/data/datasets/fast_augment.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | 3 | import torch 4 | 5 | from .augment_lists import * 6 | 7 | class RandAugment(object): 8 | def __init__(self, n, m): 9 | self.n = n 10 | self.m = m 11 | self.augment_list = rand_augment_list() 12 | self.topil = ToPIL() 13 | 14 | def __call__(self, img): 15 | img = self.topil(img) 16 | ops = random.choices(self.augment_list, k=self.n) 17 | for op, minval, maxval in ops: 18 | if random.random() > random.uniform(0.2, 0.8): 19 | continue 20 | val = (float(self.m) / 30) * float(maxval - minval) + minval 21 | img = op(img, val) 22 | return img 23 | 24 | 25 | class ToPIL(object): 26 | """Convert image from ndarray format to PIL 27 | """ 28 | def __call__(self, img): 29 | x = Image.fromarray(img.asnumpy()) 30 | return x 31 | 32 | # class ToNDArray(object): 33 | # def __call__(self, img): 34 | # x = mx.nd.array(np.array(img), .cpu(0)) 35 | # return x 36 | 37 | class AugmentationBlock(object): 38 | r""" 39 | AutoAugment Block 40 | Example 41 | ------- 42 | >>> aa_transform = AugmentationBlock(autoaug_imagenet_policies()) 43 | """ 44 | def __init__(self, policies): 45 | """ 46 | plicies : list of (name, pr, level) 47 | """ 48 | super().__init__() 49 | self.policies = policies 50 | self.topil = ToPIL() 51 | #self.tond = ToNDArray() 52 | 53 | def forward(self, img): 54 | img = self.topil(img) 55 | policy = random.choice(self.policies) 56 | for name, pr, level in policy: 57 | if random.random() > pr: 58 | continue 59 | img = apply_augment(img, name, level) 60 | #img = self.tond(img) 61 | return img 62 | -------------------------------------------------------------------------------- /det/data/detection_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | 4 | import torch 5 | 6 | from detectron2.data import transforms as T 7 | from detectron2.data.detection_utils import transform_instance_annotations as d2_transform_inst_anno 8 | from detectron2.data.detection_utils import annotations_to_instances as d2_anno_to_inst 9 | from detectron2.structures import BoxMode 10 | 11 | 12 | def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True): 13 | """ 14 | Generate a CropTransform so that the cropping region contains 15 | the center of the given instance. 16 | 17 | Args: 18 | crop_size (tuple): h, w in pixels 19 | image_size (tuple): h, w 20 | instance (dict): an annotation dict of one instance, in Detectron2's 21 | dataset format. 22 | """ 23 | instance = np.random.choice(instances), 24 | instance = instance[0] 25 | crop_size = np.asarray(crop_size, dtype=np.int32) 26 | bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) 27 | center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 28 | assert ( 29 | image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] 30 | ), "The annotation bounding box is outside of the image!" 31 | assert ( 32 | image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] 33 | ), "Crop size is larger than image size!" 34 | 35 | min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) 36 | max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) 37 | max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) 38 | 39 | y0 = np.random.randint(min_yx[0], max_yx[0] + 1) 40 | x0 = np.random.randint(min_yx[1], max_yx[1] + 1) 41 | 42 | # if some instance is cropped extend the box 43 | if not crop_box: 44 | modified = True 45 | while modified: 46 | modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances) 47 | 48 | return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0]))) 49 | 50 | 51 | def adjust_crop(x0, y0, crop_size, instances): 52 | modified = False 53 | 54 | x1 = x0 + crop_size[1] 55 | y1 = y0 + crop_size[0] 56 | 57 | for instance in instances: 58 | bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) 59 | 60 | if bbox[0] < x0 and bbox[2] > x0: 61 | crop_size[1] += x0 - bbox[0] 62 | x0 = bbox[0] 63 | modified = True 64 | 65 | if bbox[0] < x1 and bbox[2] > x1: 66 | crop_size[1] += bbox[2] - x1 67 | x1 = bbox[2] 68 | modified = True 69 | 70 | if bbox[1] < y0 and bbox[3] > y0: 71 | crop_size[0] += y0 - bbox[1] 72 | y0 = bbox[1] 73 | modified = True 74 | 75 | if bbox[1] < y1 and bbox[3] > y1: 76 | crop_size[0] += bbox[3] - y1 77 | y1 = bbox[3] 78 | modified = True 79 | 80 | return modified, x0, y0, crop_size 81 | 82 | 83 | def transform_instance_annotations( 84 | annotation, transforms, image_size, *, keypoint_hflip_indices=None 85 | ): 86 | 87 | annotation = d2_transform_inst_anno( 88 | annotation, transforms, image_size, 89 | keypoint_hflip_indices=keypoint_hflip_indices) 90 | 91 | if "beziers" in annotation: 92 | beziers = transform_beziers_annotations( 93 | annotation["beziers"], transforms 94 | ) 95 | annotation["beziers"] = beziers 96 | return annotation 97 | 98 | 99 | def transform_beziers_annotations(beziers, transforms): 100 | """ 101 | Transform keypoint annotations of an image. 102 | 103 | Args: 104 | beziers (list[float]): Nx16 float in Detectron2 Dataset format. 105 | transforms (TransformList): 106 | """ 107 | # (N*2,) -> (N, 2) 108 | beziers = np.asarray(beziers, dtype="float64").reshape(-1, 2) 109 | beziers = transforms.apply_coords(beziers).reshape(-1) 110 | 111 | # This assumes that HorizFlipTransform is the only one that does flip 112 | do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 113 | if do_hflip: 114 | raise ValueError("Flipping text data is not supported (also disencouraged).") 115 | 116 | return beziers 117 | 118 | 119 | def annotations_to_instances(annos, image_size, mask_format="polygon"): 120 | instance = d2_anno_to_inst(annos, image_size, mask_format) 121 | 122 | # add attributes 123 | if "beziers" in annos[0]: 124 | beziers = [obj.get("beziers", []) for obj in annos] 125 | instance.beziers = torch.as_tensor( 126 | beziers, dtype=torch.float32) 127 | 128 | if "rec" in annos[0]: 129 | text = [obj.get("rec", []) for obj in annos] 130 | instance.text = torch.as_tensor( 131 | text, dtype=torch.int32) 132 | 133 | return instance 134 | 135 | 136 | def build_transform_gen(cfg, is_train): 137 | """ 138 | With option to don't use hflip 139 | 140 | Returns: 141 | list[TransformGen] 142 | """ 143 | if is_train: 144 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 145 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 146 | sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING 147 | else: 148 | min_size = cfg.INPUT.MIN_SIZE_TEST 149 | max_size = cfg.INPUT.MAX_SIZE_TEST 150 | sample_style = "choice" 151 | if sample_style == "range": 152 | assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format( 153 | len(min_size) 154 | ) 155 | 156 | logger = logging.getLogger(__name__) 157 | tfm_gens = [] 158 | tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) 159 | if is_train: 160 | if cfg.INPUT.HFLIP_TRAIN: 161 | tfm_gens.append(T.RandomFlip()) 162 | logger.info("TransformGens used in training: " + str(tfm_gens)) 163 | return tfm_gens 164 | -------------------------------------------------------------------------------- /det/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import DFConv2d 2 | from .ml_nms import ml_nms 3 | from .iou_loss import IOULoss 4 | from .balanced_l1_loss import BalancedL1Loss, BoundedIoULoss 5 | from .conv_with_kaiming_uniform import conv_with_kaiming_uniform 6 | from .bezier_align import BezierAlign 7 | from .def_roi_align import DefROIAlign 8 | from .naive_group_norm import NaiveGroupNorm 9 | from .gcn import GCN 10 | from .soft_nms import batched_soft_nms 11 | __all__ = [k for k in globals().keys() if not k.startswith("_")] -------------------------------------------------------------------------------- /det/layers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/balanced_l1_loss.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/bezier_align.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/bezier_align.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/conv_with_kaiming_uniform.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/def_roi_align.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/def_roi_align.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/deform_conv.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/deform_conv.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/gcn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/gcn.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/iou_loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/iou_loss.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/ml_nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/ml_nms.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/naive_group_norm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/naive_group_norm.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/__pycache__/soft_nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/layers/__pycache__/soft_nms.cpython-36.pyc -------------------------------------------------------------------------------- /det/layers/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | import functools 6 | 7 | import torch.nn.functional as F 8 | 9 | 10 | def reduce_loss(loss, reduction): 11 | """Reduce loss as specified. 12 | Args: 13 | loss (Tensor): Elementwise loss tensor. 14 | reduction (str): Options are "none", "mean" and "sum". 15 | Return: 16 | Tensor: Reduced loss tensor. 17 | """ 18 | reduction_enum = F._Reduction.get_enum(reduction) 19 | # none: 0, elementwise_mean:1, sum: 2 20 | if reduction_enum == 0: 21 | return loss 22 | elif reduction_enum == 1: 23 | return loss.mean() 24 | elif reduction_enum == 2: 25 | return loss.sum() 26 | 27 | 28 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 29 | """Apply element-wise weight and reduce loss. 30 | Args: 31 | loss (Tensor): Element-wise loss. 32 | weight (Tensor): Element-wise weights. 33 | reduction (str): Same as built-in losses of PyTorch. 34 | avg_factor (float): Avarage factor when computing the mean of losses. 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | To use this decorator, the loss function must have the signature like 58 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 59 | element-wise loss without any reduction. This decorator will add weight 60 | and reduction arguments to the function. The decorated function will have 61 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 62 | avg_factor=None, **kwargs)`. 63 | :Example: 64 | >>> @weighted_loss 65 | >>> def l1_loss(pred, target): 66 | >>> return (pred - target).abs() 67 | >>> pred = torch.Tensor([0, 2, 3]) 68 | >>> target = torch.Tensor([1, 1, 1]) 69 | >>> weight = torch.Tensor([1, 0, 1]) 70 | >>> l1_loss(pred, target) 71 | tensor(1.3333) 72 | >>> l1_loss(pred, target, weight) 73 | tensor(1.) 74 | >>> l1_loss(pred, target, reduction='none') 75 | tensor([1., 1., 2.]) 76 | >>> l1_loss(pred, target, weight, avg_factor=2) 77 | tensor(1.5000) 78 | """ 79 | 80 | @functools.wraps(loss_func) 81 | def wrapper(pred, 82 | target, 83 | weight=None, 84 | reduction='mean', 85 | avg_factor=None, 86 | **kwargs): 87 | # get element-wise loss 88 | loss = loss_func(pred, target, **kwargs) 89 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 90 | return loss 91 | 92 | return wrapper 93 | 94 | 95 | 96 | @weighted_loss 97 | def balanced_l1_loss(pred, 98 | target, 99 | beta=1.0, 100 | alpha=0.5, 101 | gamma=1.5, 102 | reduction='mean'): 103 | assert beta > 0 104 | assert pred.size() == target.size() and target.numel() > 0 105 | 106 | diff = torch.abs(pred - target) 107 | b = np.e**(gamma / alpha) - 1 108 | loss = torch.where( 109 | diff < beta, alpha / b * 110 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 111 | gamma * diff + gamma / b - alpha * beta) 112 | 113 | return loss 114 | 115 | @weighted_loss 116 | def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3): 117 | """Improving Object Localization with Fitness NMS and Bounded IoU Loss, 118 | https://arxiv.org/abs/1711.00164. 119 | 120 | Args: 121 | pred (tensor): Predicted bboxes. 122 | target (tensor): Target bboxes. 123 | beta (float): beta parameter in smoothl1. 124 | eps (float): eps to avoid NaN. 125 | """ 126 | pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5 127 | pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5 128 | pred_w = pred[:, 2] - pred[:, 0] 129 | pred_h = pred[:, 3] - pred[:, 1] 130 | with torch.no_grad(): 131 | target_ctrx = (target[:, 0] + target[:, 2]) * 0.5 132 | target_ctry = (target[:, 1] + target[:, 3]) * 0.5 133 | target_w = target[:, 2] - target[:, 0] 134 | target_h = target[:, 3] - target[:, 1] 135 | 136 | dx = target_ctrx - pred_ctrx 137 | dy = target_ctry - pred_ctry 138 | 139 | loss_dx = 1 - torch.max( 140 | (target_w - 2 * dx.abs()) / 141 | (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx)) 142 | loss_dy = 1 - torch.max( 143 | (target_h - 2 * dy.abs()) / 144 | (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy)) 145 | loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w / 146 | (target_w + eps)) 147 | loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h / 148 | (target_h + eps)) 149 | loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh], 150 | dim=-1).view(loss_dx.size(0), -1) 151 | 152 | loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta, 153 | loss_comb - 0.5 * beta) 154 | return loss 155 | 156 | class BalancedL1Loss(nn.Module): 157 | """Balanced L1 Loss 158 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 159 | """ 160 | 161 | def __init__(self, 162 | alpha=0.5, 163 | gamma=1.5, 164 | beta=1.0, 165 | reduction='mean', 166 | loss_weight=1.0): 167 | super(BalancedL1Loss, self).__init__() 168 | self.alpha = alpha 169 | self.gamma = gamma 170 | self.beta = beta 171 | self.reduction = reduction 172 | self.loss_weight = loss_weight 173 | 174 | def forward(self, 175 | pred, 176 | target, 177 | weight=None, 178 | avg_factor=None, 179 | reduction_override=None, 180 | **kwargs): 181 | assert reduction_override in (None, 'none', 'mean', 'sum') 182 | reduction = ( 183 | reduction_override if reduction_override else self.reduction) 184 | loss_bbox = self.loss_weight * balanced_l1_loss( 185 | pred, 186 | target, 187 | weight, 188 | alpha=self.alpha, 189 | gamma=self.gamma, 190 | beta=self.beta, 191 | reduction=reduction, 192 | avg_factor=avg_factor, 193 | **kwargs) 194 | return loss_bbox 195 | 196 | 197 | class BoundedIoULoss(nn.Module): 198 | 199 | def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0): 200 | super(BoundedIoULoss, self).__init__() 201 | self.beta = beta 202 | self.eps = eps 203 | self.reduction = reduction 204 | self.loss_weight = loss_weight 205 | 206 | def forward(self, 207 | pred, 208 | target, 209 | weight=None, 210 | avg_factor=None, 211 | reduction_override=None, 212 | **kwargs): 213 | if weight is not None and not torch.any(weight > 0): 214 | return (pred * weight).sum() # 0 215 | assert reduction_override in (None, 'none', 'mean', 'sum') 216 | reduction = ( 217 | reduction_override if reduction_override else self.reduction) 218 | loss = self.loss_weight * bounded_iou_loss( 219 | pred, 220 | target, 221 | weight, 222 | beta=self.beta, 223 | eps=self.eps, 224 | reduction=reduction, 225 | avg_factor=avg_factor, 226 | **kwargs) 227 | return loss 228 | -------------------------------------------------------------------------------- /det/layers/bezier_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from det import _C 8 | 9 | 10 | class _BezierAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | ctx.aligned = aligned 19 | output = _C.bezier_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.bezier_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ctx.aligned, 44 | ) 45 | return grad_input, None, None, None, None, None 46 | 47 | 48 | bezier_align = _BezierAlign.apply 49 | 50 | 51 | class BezierAlign(nn.Module): 52 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): 53 | """ 54 | Args: 55 | output_size (tuple): h, w 56 | spatial_scale (float): scale the input boxes by this number 57 | sampling_ratio (int): number of inputs samples to take for each output 58 | sample. 0 to take samples densely. 59 | aligned (bool): if False, use the legacy implementation in 60 | Detectron. If True, align the results more perfectly. 61 | 62 | Note: 63 | The meaning of aligned=True: 64 | 65 | With `aligned=True`, 66 | we first appropriately scale the ROI and then shift it by -0.5 67 | prior to calling bezier_align. This produces the correct neighbors; see 68 | det/tests/test_bezier_align.py for verification. 69 | 70 | The difference does not make a difference to the model's performance if 71 | ROIAlign is used together with conv layers. 72 | """ 73 | super(BezierAlign, self).__init__() 74 | self.output_size = output_size 75 | self.spatial_scale = spatial_scale 76 | self.sampling_ratio = sampling_ratio 77 | self.aligned = aligned 78 | 79 | def forward(self, input, rois): 80 | """ 81 | Args: 82 | input: NCHW images 83 | rois: Bx17 boxes. First column is the index into N. The other 16 columns are [xy]x8. 84 | """ 85 | assert rois.dim() == 2 and rois.size(1) == 17 86 | return bezier_align( 87 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned 88 | ) 89 | 90 | def __repr__(self): 91 | tmpstr = self.__class__.__name__ + "(" 92 | tmpstr += "output_size=" + str(self.output_size) 93 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 94 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 95 | tmpstr += ", aligned=" + str(self.aligned) 96 | tmpstr += ")" 97 | return tmpstr 98 | -------------------------------------------------------------------------------- /det/layers/conv_with_kaiming_uniform.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from detectron2.layers import Conv2d 4 | from .deform_conv import DFConv2d 5 | from detectron2.layers.batch_norm import get_norm 6 | 7 | 8 | def conv_with_kaiming_uniform( 9 | norm=None, activation=None, 10 | use_deformable=False, use_sep=False): 11 | def make_conv( 12 | in_channels, out_channels, kernel_size, stride=1, dilation=1 13 | ): 14 | if use_deformable: 15 | conv_func = DFConv2d 16 | else: 17 | conv_func = Conv2d 18 | if use_sep: 19 | assert in_channels == out_channels 20 | groups = in_channels 21 | else: 22 | groups = 1 23 | conv = conv_func( 24 | in_channels, 25 | out_channels, 26 | kernel_size=kernel_size, 27 | stride=stride, 28 | padding=dilation * (kernel_size - 1) // 2, 29 | dilation=dilation, 30 | groups=groups, 31 | bias=(norm is None) 32 | ) 33 | if not use_deformable: 34 | # Caffe2 implementation uses XavierFill, which in fact 35 | # corresponds to kaiming_uniform_ in PyTorch 36 | nn.init.kaiming_uniform_(conv.weight, a=1) 37 | if norm is None: 38 | nn.init.constant_(conv.bias, 0) 39 | module = [conv,] 40 | if norm is not None and len(norm) > 0: 41 | if norm == "GN": 42 | norm_module = nn.GroupNorm(32, out_channels) 43 | else: 44 | norm_module = get_norm(norm, out_channels) 45 | module.append(norm_module) 46 | if activation is not None: 47 | module.append(nn.ReLU(inplace=True)) 48 | if len(module) > 1: 49 | return nn.Sequential(*module) 50 | return conv 51 | 52 | return make_conv 53 | -------------------------------------------------------------------------------- /det/layers/csrc/BezierAlign/BezierAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace det { 6 | 7 | at::Tensor BezierAlign_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio, 14 | bool aligned); 15 | 16 | at::Tensor BezierAlign_backward_cpu( 17 | const at::Tensor& grad, 18 | const at::Tensor& rois, 19 | const float spatial_scale, 20 | const int pooled_height, 21 | const int pooled_width, 22 | const int batch_size, 23 | const int channels, 24 | const int height, 25 | const int width, 26 | const int sampling_ratio, 27 | bool aligned); 28 | 29 | #ifdef WITH_CUDA 30 | at::Tensor BezierAlign_forward_cuda( 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int sampling_ratio, 37 | bool aligned); 38 | 39 | at::Tensor BezierAlign_backward_cuda( 40 | const at::Tensor& grad, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width, 45 | const int batch_size, 46 | const int channels, 47 | const int height, 48 | const int width, 49 | const int sampling_ratio, 50 | bool aligned); 51 | #endif 52 | 53 | // Interface for Python 54 | inline at::Tensor BezierAlign_forward( 55 | const at::Tensor& input, 56 | const at::Tensor& rois, 57 | const float spatial_scale, 58 | const int pooled_height, 59 | const int pooled_width, 60 | const int sampling_ratio, 61 | bool aligned) { 62 | if (input.type().is_cuda()) { 63 | #ifdef WITH_CUDA 64 | return BezierAlign_forward_cuda( 65 | input, 66 | rois, 67 | spatial_scale, 68 | pooled_height, 69 | pooled_width, 70 | sampling_ratio, 71 | aligned); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | return BezierAlign_forward_cpu( 77 | input, 78 | rois, 79 | spatial_scale, 80 | pooled_height, 81 | pooled_width, 82 | sampling_ratio, 83 | aligned); 84 | } 85 | 86 | inline at::Tensor BezierAlign_backward( 87 | const at::Tensor& grad, 88 | const at::Tensor& rois, 89 | const float spatial_scale, 90 | const int pooled_height, 91 | const int pooled_width, 92 | const int batch_size, 93 | const int channels, 94 | const int height, 95 | const int width, 96 | const int sampling_ratio, 97 | bool aligned) { 98 | if (grad.type().is_cuda()) { 99 | #ifdef WITH_CUDA 100 | return BezierAlign_backward_cuda( 101 | grad, 102 | rois, 103 | spatial_scale, 104 | pooled_height, 105 | pooled_width, 106 | batch_size, 107 | channels, 108 | height, 109 | width, 110 | sampling_ratio, 111 | aligned); 112 | #else 113 | AT_ERROR("Not compiled with GPU support"); 114 | #endif 115 | } 116 | return BezierAlign_backward_cpu( 117 | grad, 118 | rois, 119 | spatial_scale, 120 | pooled_height, 121 | pooled_width, 122 | batch_size, 123 | channels, 124 | height, 125 | width, 126 | sampling_ratio, 127 | aligned); 128 | } 129 | 130 | } // namespace detectron2 131 | -------------------------------------------------------------------------------- /det/layers/csrc/DefROIAlign/DefROIAlign.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace det { 5 | 6 | #ifdef WITH_CUDA 7 | at::Tensor DefROIAlign_forward_cuda( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const at::Tensor& offsets, // def added 11 | const float spatial_scale, 12 | const int pooled_height, 13 | const int pooled_width, 14 | const int sampling_ratio, 15 | const float trans_std, // def added 16 | bool aligned); 17 | 18 | at::Tensor DefROIAlign_backward_cuda( 19 | const at::Tensor& input, // def added 20 | const at::Tensor& grad, 21 | const at::Tensor& rois, 22 | const at::Tensor& offsets, // def added 23 | const at::Tensor& grad_offsets, // def added 24 | const float spatial_scale, 25 | const int pooled_height, 26 | const int pooled_width, 27 | const int batch_size, 28 | const int channels, 29 | const int height, 30 | const int width, 31 | const int sampling_ratio, 32 | const float trans_std, // def added 33 | bool aligned); 34 | #endif 35 | 36 | // Interface for Python 37 | inline at::Tensor DefROIAlign_forward( 38 | const at::Tensor& input, 39 | const at::Tensor& rois, 40 | const at::Tensor& offsets, // def added 41 | const float spatial_scale, 42 | const int pooled_height, 43 | const int pooled_width, 44 | const int sampling_ratio, 45 | const float trans_std, // def added 46 | bool aligned) { 47 | if (input.type().is_cuda()) { 48 | #ifdef WITH_CUDA 49 | return DefROIAlign_forward_cuda( 50 | input, 51 | rois, 52 | offsets, 53 | spatial_scale, 54 | pooled_height, 55 | pooled_width, 56 | sampling_ratio, 57 | trans_std, 58 | aligned); 59 | #else 60 | AT_ERROR("Not compiled with GPU support"); 61 | #endif 62 | } 63 | AT_ERROR("CPU version not supported"); 64 | } 65 | 66 | inline at::Tensor DefROIAlign_backward( 67 | const at::Tensor& input, // def added 68 | const at::Tensor& grad, 69 | const at::Tensor& rois, 70 | const at::Tensor& offsets, // def added 71 | const at::Tensor& grad_offsets, // def added 72 | const float spatial_scale, 73 | const int pooled_height, 74 | const int pooled_width, 75 | const int batch_size, 76 | const int channels, 77 | const int height, 78 | const int width, 79 | const int sampling_ratio, 80 | const float trans_std, // def added 81 | bool aligned) { 82 | if (grad.type().is_cuda()) { 83 | #ifdef WITH_CUDA 84 | return DefROIAlign_backward_cuda( 85 | input, // def added 86 | grad, 87 | rois, 88 | offsets, // def added 89 | grad_offsets, // def added 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio, 98 | trans_std, // def added 99 | aligned); 100 | #else 101 | AT_ERROR("Not compiled with GPU support"); 102 | #endif 103 | } 104 | AT_ERROR("CPU version not supported"); 105 | } 106 | 107 | } // namespace det 108 | -------------------------------------------------------------------------------- /det/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace det { 4 | int get_cudart_version() { 5 | return CUDART_VERSION; 6 | } 7 | } // namespace det 8 | -------------------------------------------------------------------------------- /det/layers/csrc/ml_nms/ml_nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 11 | 12 | __device__ inline float devIoU(float const * const a, float const * const b) { 13 | if (a[5] != b[5]) { 14 | return 0.0; 15 | } 16 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 17 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 18 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 19 | float interS = width * height; 20 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 21 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 22 | return interS / (Sa + Sb - interS); 23 | } 24 | 25 | __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh, 26 | const float *dev_boxes, unsigned long long *dev_mask) { 27 | const int row_start = blockIdx.y; 28 | const int col_start = blockIdx.x; 29 | 30 | // if (row_start > col_start) return; 31 | 32 | const int row_size = 33 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 34 | const int col_size = 35 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 36 | 37 | __shared__ float block_boxes[threadsPerBlock * 6]; 38 | if (threadIdx.x < col_size) { 39 | block_boxes[threadIdx.x * 6 + 0] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0]; 41 | block_boxes[threadIdx.x * 6 + 1] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1]; 43 | block_boxes[threadIdx.x * 6 + 2] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2]; 45 | block_boxes[threadIdx.x * 6 + 3] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3]; 47 | block_boxes[threadIdx.x * 6 + 4] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4]; 49 | block_boxes[threadIdx.x * 6 + 5] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5]; 51 | } 52 | __syncthreads(); 53 | 54 | if (threadIdx.x < row_size) { 55 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 56 | const float *cur_box = dev_boxes + cur_box_idx * 6; 57 | int i = 0; 58 | unsigned long long t = 0; 59 | int start = 0; 60 | if (row_start == col_start) { 61 | start = threadIdx.x + 1; 62 | } 63 | for (i = start; i < col_size; i++) { 64 | if (devIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) { 65 | t |= 1ULL << i; 66 | } 67 | } 68 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 69 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 70 | } 71 | } 72 | 73 | namespace det { 74 | 75 | // boxes is a N x 6 tensor 76 | at::Tensor ml_nms_cuda(const at::Tensor boxes, const float nms_overlap_thresh) { 77 | using scalar_t = float; 78 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 79 | auto scores = boxes.select(1, 4); 80 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 81 | auto boxes_sorted = boxes.index_select(0, order_t); 82 | 83 | int boxes_num = boxes.size(0); 84 | 85 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 86 | 87 | scalar_t* boxes_dev = boxes_sorted.data(); 88 | 89 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 90 | 91 | unsigned long long* mask_dev = NULL; 92 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 93 | // boxes_num * col_blocks * sizeof(unsigned long long))); 94 | 95 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 96 | 97 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 98 | THCCeilDiv(boxes_num, threadsPerBlock)); 99 | dim3 threads(threadsPerBlock); 100 | ml_nms_kernel<<>>(boxes_num, 101 | nms_overlap_thresh, 102 | boxes_dev, 103 | mask_dev); 104 | 105 | std::vector mask_host(boxes_num * col_blocks); 106 | THCudaCheck(cudaMemcpy(&mask_host[0], 107 | mask_dev, 108 | sizeof(unsigned long long) * boxes_num * col_blocks, 109 | cudaMemcpyDeviceToHost)); 110 | 111 | std::vector remv(col_blocks); 112 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 113 | 114 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 115 | int64_t* keep_out = keep.data(); 116 | 117 | int num_to_keep = 0; 118 | for (int i = 0; i < boxes_num; i++) { 119 | int nblock = i / threadsPerBlock; 120 | int inblock = i % threadsPerBlock; 121 | 122 | if (!(remv[nblock] & (1ULL << inblock))) { 123 | keep_out[num_to_keep++] = i; 124 | unsigned long long *p = &mask_host[0] + i * col_blocks; 125 | for (int j = nblock; j < col_blocks; j++) { 126 | remv[j] |= p[j]; 127 | } 128 | } 129 | } 130 | 131 | THCudaFree(state, mask_dev); 132 | // TODO improve this part 133 | return std::get<0>(order_t.index({ 134 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 135 | order_t.device(), keep.scalar_type()) 136 | }).sort(0, false)); 137 | } 138 | 139 | } // namespace det -------------------------------------------------------------------------------- /det/layers/csrc/ml_nms/ml_nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace det { 5 | 6 | 7 | #ifdef WITH_CUDA 8 | at::Tensor ml_nms_cuda( 9 | const at::Tensor dets, 10 | const float threshold); 11 | #endif 12 | 13 | at::Tensor ml_nms(const at::Tensor& dets, 14 | const at::Tensor& scores, 15 | const at::Tensor& labels, 16 | const float threshold) { 17 | 18 | if (dets.type().is_cuda()) { 19 | #ifdef WITH_CUDA 20 | // TODO raise error if not compiled with CUDA 21 | if (dets.numel() == 0) 22 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 23 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 24 | return ml_nms_cuda(b, threshold); 25 | #else 26 | AT_ERROR("Not compiled with GPU support"); 27 | #endif 28 | } 29 | AT_ERROR("CPU version not implemented"); 30 | } 31 | 32 | } // namespace det 33 | -------------------------------------------------------------------------------- /det/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include "ml_nms/ml_nms.h" 4 | #include "DefROIAlign/DefROIAlign.h" 5 | #include "BezierAlign/BezierAlign.h" 6 | 7 | namespace det { 8 | 9 | #ifdef WITH_CUDA 10 | extern int get_cudart_version(); 11 | #endif 12 | 13 | std::string get_cuda_version() { 14 | #ifdef WITH_CUDA 15 | std::ostringstream oss; 16 | 17 | // copied from 18 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 19 | auto printCudaStyleVersion = [&](int v) { 20 | oss << (v / 1000) << "." << (v / 10 % 100); 21 | if (v % 10 != 0) { 22 | oss << "." << (v % 10); 23 | } 24 | }; 25 | printCudaStyleVersion(get_cudart_version()); 26 | return oss.str(); 27 | #else 28 | return std::string("not available"); 29 | #endif 30 | } 31 | 32 | // similar to 33 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 34 | std::string get_compiler_version() { 35 | std::ostringstream ss; 36 | #if defined(__GNUC__) 37 | #ifndef __clang__ 38 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 39 | #endif 40 | #endif 41 | 42 | #if defined(__clang_major__) 43 | { 44 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 45 | << __clang_patchlevel__; 46 | } 47 | #endif 48 | 49 | #if defined(_MSC_VER) 50 | { ss << "MSVC " << _MSC_FULL_VER; } 51 | #endif 52 | return ss.str(); 53 | } 54 | 55 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 56 | m.def("ml_nms", &ml_nms, "Multi-Label NMS"); 57 | m.def("def_roi_align_forward", &DefROIAlign_forward, "def_roi_align_forward"); 58 | m.def("def_roi_align_backward", &DefROIAlign_backward, "def_roi_align_backward"); 59 | m.def("bezier_align_forward", &BezierAlign_forward, "bezier_align_forward"); 60 | m.def("bezier_align_backward", &BezierAlign_backward, "bezier_align_backward"); 61 | } 62 | 63 | } // namespace det 64 | -------------------------------------------------------------------------------- /det/layers/def_roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from det import _C 8 | 9 | 10 | class _DefROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, offsets, output_size, spatial_scale, sampling_ratio, trans_std, aligned): 13 | ctx.save_for_backward(input, roi, offsets) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.trans_std = trans_std 18 | ctx.input_shape = input.size() 19 | ctx.aligned = aligned 20 | output = _C.def_roi_align_forward( 21 | input, roi, offsets, spatial_scale, output_size[0], output_size[1], 22 | sampling_ratio, trans_std, aligned 23 | ) 24 | return output 25 | 26 | @staticmethod 27 | @once_differentiable 28 | def backward(ctx, grad_output): 29 | data, rois, offsets = ctx.saved_tensors 30 | output_size = ctx.output_size 31 | spatial_scale = ctx.spatial_scale 32 | sampling_ratio = ctx.sampling_ratio 33 | trans_std = ctx.trans_std 34 | bs, ch, h, w = ctx.input_shape 35 | grad_offsets = torch.zeros_like(offsets) 36 | 37 | grad_input = _C.def_roi_align_backward( 38 | data, 39 | grad_output, 40 | rois, 41 | offsets, 42 | grad_offsets, 43 | spatial_scale, 44 | output_size[0], 45 | output_size[1], 46 | bs, 47 | ch, 48 | h, 49 | w, 50 | sampling_ratio, 51 | trans_std, 52 | ctx.aligned, 53 | ) 54 | return grad_input, None, grad_offsets, None, None, None, None, None 55 | 56 | 57 | def_roi_align = _DefROIAlign.apply 58 | 59 | 60 | class DefROIAlign(nn.Module): 61 | def __init__(self, output_size, spatial_scale, 62 | sampling_ratio, trans_std, aligned=True): 63 | """ 64 | Args: 65 | output_size (tuple): h, w 66 | spatial_scale (float): scale the input boxes by this number 67 | sampling_ratio (int): number of inputs samples to take for each output 68 | sample. 0 to take samples densely. 69 | trans_std (float): offset scale according to the normalized roi size 70 | aligned (bool): if False, use the legacy implementation in 71 | Detectron. If True, align the results more perfectly. 72 | """ 73 | super(DefROIAlign, self).__init__() 74 | self.output_size = output_size 75 | self.spatial_scale = spatial_scale 76 | self.sampling_ratio = sampling_ratio 77 | self.trans_std = trans_std 78 | self.aligned = aligned 79 | 80 | def forward(self, input, rois, offsets): 81 | """ 82 | Args: 83 | input: NCHW images 84 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 85 | """ 86 | assert rois.dim() == 2 and rois.size(1) == 5 87 | return def_roi_align( 88 | input, rois, offsets, self.output_size, 89 | self.spatial_scale, self.sampling_ratio, 90 | self.trans_std, self.aligned 91 | ) 92 | 93 | def __repr__(self): 94 | tmpstr = self.__class__.__name__ + "(" 95 | tmpstr += "output_size=" + str(self.output_size) 96 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 97 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 98 | tmpstr += ", trans_std=" + str(self.trans_std) 99 | tmpstr += ", aligned=" + str(self.aligned) 100 | tmpstr += ")" 101 | return tmpstr 102 | -------------------------------------------------------------------------------- /det/layers/deform_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from detectron2.layers import Conv2d 5 | 6 | 7 | class _NewEmptyTensorOp(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, x, new_shape): 10 | ctx.shape = x.shape 11 | return x.new_empty(new_shape) 12 | 13 | @staticmethod 14 | def backward(ctx, grad): 15 | shape = ctx.shape 16 | return _NewEmptyTensorOp.apply(grad, shape), None 17 | 18 | 19 | class DFConv2d(nn.Module): 20 | """ 21 | Deformable convolutional layer with configurable 22 | deformable groups, dilations and groups. 23 | 24 | Code is from: 25 | https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py 26 | 27 | 28 | """ 29 | def __init__( 30 | self, 31 | in_channels, 32 | out_channels, 33 | with_modulated_dcn=True, 34 | kernel_size=3, 35 | stride=1, 36 | groups=1, 37 | dilation=1, 38 | deformable_groups=1, 39 | bias=False, 40 | padding=None 41 | ): 42 | super(DFConv2d, self).__init__() 43 | if isinstance(kernel_size, (list, tuple)): 44 | assert isinstance(stride, (list, tuple)) 45 | assert isinstance(dilation, (list, tuple)) 46 | assert len(kernel_size) == 2 47 | assert len(stride) == 2 48 | assert len(dilation) == 2 49 | padding = ( 50 | dilation[0] * (kernel_size[0] - 1) // 2, 51 | dilation[1] * (kernel_size[1] - 1) // 2 52 | ) 53 | offset_base_channels = kernel_size[0] * kernel_size[1] 54 | else: 55 | padding = dilation * (kernel_size - 1) // 2 56 | offset_base_channels = kernel_size * kernel_size 57 | if with_modulated_dcn: 58 | from detectron2.layers.deform_conv import ModulatedDeformConv 59 | offset_channels = offset_base_channels * 3 # default: 27 60 | conv_block = ModulatedDeformConv 61 | else: 62 | from detectron2.layers.deform_conv import DeformConv 63 | offset_channels = offset_base_channels * 2 # default: 18 64 | conv_block = DeformConv 65 | self.offset = Conv2d( 66 | in_channels, 67 | deformable_groups * offset_channels, 68 | kernel_size=kernel_size, 69 | stride=stride, 70 | padding=padding, 71 | groups=1, 72 | dilation=dilation 73 | ) 74 | for l in [self.offset, ]: 75 | nn.init.kaiming_uniform_(l.weight, a=1) 76 | torch.nn.init.constant_(l.bias, 0.) 77 | self.conv = conv_block( 78 | in_channels, 79 | out_channels, 80 | kernel_size=kernel_size, 81 | stride=stride, 82 | padding=padding, 83 | dilation=dilation, 84 | groups=groups, 85 | deformable_groups=deformable_groups, 86 | bias=bias 87 | ) 88 | self.with_modulated_dcn = with_modulated_dcn 89 | self.kernel_size = kernel_size 90 | self.stride = stride 91 | self.padding = padding 92 | self.dilation = dilation 93 | self.offset_split = offset_base_channels * deformable_groups * 2 94 | 95 | def forward(self, x, return_offset=False): 96 | if x.numel() > 0: 97 | if not self.with_modulated_dcn: 98 | offset_mask = self.offset(x) 99 | x = self.conv(x, offset_mask) 100 | else: 101 | offset_mask = self.offset(x) 102 | offset = offset_mask[:, :self.offset_split, :, :] 103 | mask = offset_mask[:, self.offset_split:, :, :].sigmoid() 104 | x = self.conv(x, offset, mask) 105 | if return_offset: 106 | return x, offset_mask 107 | return x 108 | # get output shape 109 | output_shape = [ 110 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 111 | for i, p, di, k, d in zip( 112 | x.shape[-2:], 113 | self.padding, 114 | self.dilation, 115 | self.kernel_size, 116 | self.stride 117 | ) 118 | ] 119 | output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape 120 | return _NewEmptyTensorOp.apply(x, output_shape) 121 | -------------------------------------------------------------------------------- /det/layers/gcn.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Conv2D(nn.Module): 7 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', 8 | stride=1, dilation=1, groups=1): 9 | super(Conv2D, self).__init__() 10 | 11 | assert type(kernel_size) in [int, tuple], "Allowed kernel type [int or tuple], not {}".format(type(kernel_size)) 12 | assert padding == 'same', "Allowed padding type {}, not {}".format('same', padding) 13 | 14 | self.kernel_size = kernel_size 15 | if isinstance(kernel_size, tuple): 16 | self.h_kernel = kernel_size[0] 17 | self.w_kernel = kernel_size[1] 18 | else: 19 | self.h_kernel = kernel_size 20 | self.w_kernel = kernel_size 21 | 22 | self.padding = padding 23 | self.stride = stride 24 | self.dilation = dilation 25 | self.groups = groups 26 | self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 27 | stride=self.stride, dilation=self.dilation, groups=self.groups) 28 | 29 | def forward(self, x): 30 | 31 | if self.padding == 'same': 32 | 33 | height, width = x.shape[2:] 34 | 35 | h_pad_need = max(0, (height - 1) * self.stride + self.h_kernel - height) 36 | w_pad_need = max(0, (width - 1) * self.stride + self.w_kernel - width) 37 | 38 | pad_left = w_pad_need // 2 39 | pad_right = w_pad_need - pad_left 40 | pad_top = h_pad_need // 2 41 | pad_bottom = h_pad_need - pad_top 42 | 43 | padding = (pad_left, pad_right, pad_top, pad_bottom) 44 | 45 | x = F.pad(x, padding, 'constant', 0) 46 | 47 | x = self.conv(x) 48 | 49 | return x 50 | 51 | 52 | class GCN(nn.Module): 53 | """ 54 | Large Kernel Matters -- https://arxiv.org/abs/1703.02719 55 | """ 56 | def __init__(self, in_channels, out_channels, k=3): 57 | super(GCN, self).__init__() 58 | 59 | self.conv_l1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same') 60 | self.conv_l2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(1, k), padding='same') 61 | 62 | self.conv_r1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, k), padding='same') 63 | self.conv_r2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same') 64 | 65 | def forward(self, x): 66 | x1 = self.conv_l1(x) 67 | x1 = self.conv_l2(x1) 68 | 69 | x2 = self.conv_r1(x) 70 | x2 = self.conv_r2(x2) 71 | 72 | out = x1 + x2 73 | 74 | return out 75 | -------------------------------------------------------------------------------- /det/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class IOULoss(nn.Module): 6 | """ 7 | Intersetion Over Union (IoU) loss which supports three 8 | different IoU computations: 9 | 10 | * IoU 11 | * Linear IoU 12 | * gIoU 13 | """ 14 | def __init__(self, loc_loss_type='iou'): 15 | super(IOULoss, self).__init__() 16 | self.loc_loss_type = loc_loss_type 17 | 18 | def forward(self, pred, target, weight=None): 19 | """ 20 | Args: 21 | pred: Nx4 predicted bounding boxes 22 | target: Nx4 target bounding boxes 23 | weight: N loss weight for each instance 24 | """ 25 | pred_left = pred[:, 0] 26 | pred_top = pred[:, 1] 27 | pred_right = pred[:, 2] 28 | pred_bottom = pred[:, 3] 29 | 30 | target_left = target[:, 0] 31 | target_top = target[:, 1] 32 | target_right = target[:, 2] 33 | target_bottom = target[:, 3] 34 | 35 | target_aera = (target_left + target_right) * \ 36 | (target_top + target_bottom) 37 | pred_aera = (pred_left + pred_right) * \ 38 | (pred_top + pred_bottom) 39 | 40 | w_intersect = torch.min(pred_left, target_left) + \ 41 | torch.min(pred_right, target_right) 42 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 43 | torch.min(pred_top, target_top) 44 | 45 | g_w_intersect = torch.max(pred_left, target_left) + \ 46 | torch.max(pred_right, target_right) 47 | g_h_intersect = torch.max(pred_bottom, target_bottom) + \ 48 | torch.max(pred_top, target_top) 49 | ac_uion = g_w_intersect * g_h_intersect 50 | 51 | area_intersect = w_intersect * h_intersect 52 | area_union = target_aera + pred_aera - area_intersect 53 | 54 | ious = (area_intersect + 1.0) / (area_union + 1.0) 55 | gious = ious - (ac_uion - area_union) / ac_uion 56 | if self.loc_loss_type == 'iou': 57 | losses = -torch.log(ious) 58 | elif self.loc_loss_type == 'linear_iou': 59 | losses = 1 - ious 60 | elif self.loc_loss_type == 'giou': 61 | losses = 1 - gious 62 | else: 63 | raise NotImplementedError 64 | 65 | if weight is not None: 66 | return (losses * weight).sum() 67 | else: 68 | return losses.sum() 69 | -------------------------------------------------------------------------------- /det/layers/ml_nms.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import batched_nms 2 | from .soft_nms import batched_soft_nms 3 | 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1, 5 | score_field="scores", label_field="labels"): 6 | """ 7 | Performs non-maximum suppression on a boxlist, with scores specified 8 | in a boxlist field via score_field. 9 | 10 | Args: 11 | boxlist (detectron2.structures.Boxes): 12 | nms_thresh (float): 13 | max_proposals (int): if > 0, then only the top max_proposals are kept 14 | after non-maximum suppression 15 | score_field (str): 16 | """ 17 | if nms_thresh <= 0: 18 | return boxlist 19 | boxes = boxlist.pred_boxes.tensor 20 | scores = boxlist.scores 21 | labels = boxlist.pred_classes 22 | keep = batched_nms(boxes, scores, labels, nms_thresh) 23 | print(type(keep)) 24 | quit() 25 | if max_proposals > 0: 26 | keep = keep[: max_proposals] 27 | boxlist = boxlist[keep] 28 | return boxlist 29 | -------------------------------------------------------------------------------- /det/layers/naive_group_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Module, Parameter 3 | from torch.nn import init 4 | 5 | 6 | class NaiveGroupNorm(Module): 7 | r"""NaiveGroupNorm implements Group Normalization with the high-level matrix operations in PyTorch. 8 | It is a temporary solution to export GN by ONNX before the official GN can be exported by ONNX. 9 | The usage of NaiveGroupNorm is exactly the same as the official :class:`torch.nn.GroupNorm`. 10 | Args: 11 | num_groups (int): number of groups to separate the channels into 12 | num_channels (int): number of channels expected in input 13 | eps: a value added to the denominator for numerical stability. Default: 1e-5 14 | affine: a boolean value that when set to ``True``, this module 15 | has learnable per-channel affine parameters initialized to ones (for weights) 16 | and zeros (for biases). Default: ``True``. 17 | 18 | Shape: 19 | - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` 20 | - Output: :math:`(N, C, *)` (same shape as input) 21 | 22 | Examples:: 23 | 24 | >>> input = torch.randn(20, 6, 10, 10) 25 | >>> # Separate 6 channels into 3 groups 26 | >>> m = NaiveGroupNorm(3, 6) 27 | >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm) 28 | >>> m = NaiveGroupNorm(6, 6) 29 | >>> # Put all 6 channels into a single group (equivalent with LayerNorm) 30 | >>> m = NaiveGroupNorm(1, 6) 31 | >>> # Activating the module 32 | >>> output = m(input) 33 | 34 | .. _`Group Normalization`: https://arxiv.org/abs/1803.08494 35 | """ 36 | __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight', 37 | 'bias'] 38 | 39 | def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): 40 | super(NaiveGroupNorm, self).__init__() 41 | self.num_groups = num_groups 42 | self.num_channels = num_channels 43 | self.eps = eps 44 | self.affine = affine 45 | if self.affine: 46 | self.weight = Parameter(torch.Tensor(num_channels)) 47 | self.bias = Parameter(torch.Tensor(num_channels)) 48 | else: 49 | self.register_parameter('weight', None) 50 | self.register_parameter('bias', None) 51 | self.reset_parameters() 52 | 53 | def reset_parameters(self): 54 | if self.affine: 55 | init.ones_(self.weight) 56 | init.zeros_(self.bias) 57 | 58 | def forward(self, input): 59 | N, C, H, W = input.size() 60 | assert C % self.num_groups == 0 61 | input = input.reshape(N, self.num_groups, -1) 62 | mean = input.mean(dim=-1, keepdim=True) 63 | var = (input ** 2).mean(dim=-1, keepdim=True) - mean ** 2 64 | std = torch.sqrt(var + self.eps) 65 | 66 | input = (input - mean) / std 67 | input = input.reshape(N, C, H, W) 68 | if self.affine: 69 | input = input * self.weight.reshape(1, C, 1, 1) + self.bias.reshape(1, C, 1, 1) 70 | return input 71 | 72 | def extra_repr(self): 73 | return '{num_groups}, {num_channels}, eps={eps}, ' \ 74 | 'affine={affine}'.format(**self.__dict__) 75 | -------------------------------------------------------------------------------- /det/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .fcos import FCOS 3 | from .atss import ATSS 4 | from .backbone import build_fcos_resnet_fpn_backbone 5 | from .one_stage_detector import OneStageDetector, OneStageRCNN 6 | from .roi_heads import LibraRCNNROIHeads 7 | 8 | from .rpn_utils import ModuleListDial, Scale, BoxCoder, permute_and_flatten, concat_box_prediction_layers, ATSSAnchorGenerator 9 | 10 | _EXCLUDE = {"torch", "ShapeSpec"} 11 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] -------------------------------------------------------------------------------- /det/modeling/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/__pycache__/one_stage_detector.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/one_stage_detector.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/__pycache__/rpn_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/__pycache__/rpn_utils.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/atss/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss import ATSS -------------------------------------------------------------------------------- /det/modeling/atss/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/atss/__pycache__/atss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/atss/__pycache__/atss_outputs.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import build_fcos_resnet_fpn_backbone, build_resnest_fpn_backbone 2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone 3 | from .dla import build_fcos_dla_fpn_backbone 4 | from .resnet_lpf import build_resnet_lpf_backbone 5 | from .resnet import build_resnest_backbone -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/dla.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/dla.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/fpn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/fpn.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/lpf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/lpf.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_interval.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/resnet_lpf.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/splat.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/splat.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/backbone/__pycache__/vovnet.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch.nn.functional as F 3 | import fvcore.nn.weight_init as weight_init 4 | 5 | from detectron2.modeling.backbone import FPN, build_resnet_backbone 6 | from detectron2.layers import ShapeSpec 7 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 8 | 9 | from .resnet_lpf import build_resnet_lpf_backbone 10 | from .resnet_interval import build_resnet_interval_backbone 11 | from .mobilenet import build_mnv2_backbone 12 | from .resnet import build_resnest_backbone 13 | 14 | class LastLevelP6P7(nn.Module): 15 | """ 16 | This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from 17 | C5 or P5 feature. 18 | """ 19 | 20 | def __init__(self, in_channels, out_channels, in_features="res5"): 21 | super().__init__() 22 | self.num_levels = 2 23 | self.in_feature = in_features 24 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 25 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 26 | for module in [self.p6, self.p7]: 27 | weight_init.c2_xavier_fill(module) 28 | 29 | def forward(self, x): 30 | p6 = self.p6(x) 31 | p7 = self.p7(F.relu(p6)) 32 | return [p6, p7] 33 | 34 | 35 | class LastLevelP6(nn.Module): 36 | """ 37 | This module is used in FCOS to generate extra layers 38 | """ 39 | 40 | def __init__(self, in_channels, out_channels, in_features="res5"): 41 | super().__init__() 42 | self.num_levels = 1 43 | self.in_feature = in_features 44 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 45 | for module in [self.p6]: 46 | weight_init.c2_xavier_fill(module) 47 | 48 | def forward(self, x): 49 | p6 = self.p6(x) 50 | return [p6] 51 | 52 | 53 | @BACKBONE_REGISTRY.register() 54 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): 55 | """ 56 | Args: 57 | cfg: a detectron2 CfgNode 58 | 59 | Returns: 60 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 61 | """ 62 | if cfg.MODEL.BACKBONE.ANTI_ALIAS: 63 | bottom_up = build_resnet_lpf_backbone(cfg, input_shape) 64 | elif cfg.MODEL.MOBILENET: 65 | bottom_up = build_mnv2_backbone(cfg, input_shape) 66 | elif cfg.MODEL.RESNEST: 67 | bottom_up = build_resnest_backbone(cfg, input_shape) 68 | else: 69 | bottom_up = build_resnet_backbone(cfg, input_shape) 70 | in_features = cfg.MODEL.FPN.IN_FEATURES 71 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 72 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS 73 | in_channels_top = out_channels 74 | if top_levels == 2: 75 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") 76 | if top_levels == 1: 77 | top_block = LastLevelP6(in_channels_top, out_channels, "p5") 78 | elif top_levels == 0: 79 | top_block = None 80 | backbone = FPN( 81 | bottom_up=bottom_up, 82 | in_features=in_features, 83 | out_channels=out_channels, 84 | norm=cfg.MODEL.FPN.NORM, 85 | top_block=top_block, 86 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 87 | ) 88 | return backbone 89 | 90 | @BACKBONE_REGISTRY.register() 91 | def build_resnest_fpn_backbone(cfg, input_shape: ShapeSpec): 92 | if cfg.MODEL.RESNEST: 93 | bottom_up = build_resnest_backbone(cfg, input_shape) 94 | else: 95 | bottom_up = build_resnet_backbone(cfg, input_shape) 96 | in_features = cfg.MODEL.FPN.IN_FEATURES 97 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 98 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS 99 | in_channels_top = out_channels 100 | if top_levels == 2: 101 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") 102 | if top_levels == 1: 103 | top_block = LastLevelP6(in_channels_top, out_channels, "p5") 104 | elif top_levels == 0: 105 | top_block = None 106 | backbone = FPN( 107 | bottom_up=bottom_up, 108 | in_features=in_features, 109 | out_channels=out_channels, 110 | norm=cfg.MODEL.FPN.NORM, 111 | top_block=top_block, 112 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 113 | ) 114 | return backbone 115 | 116 | 117 | #BiFPN 118 | #https://github.com/sxhxliang/detectron2_backbone/blob/master/detectron2_backbone/backbone/bifpn.py -------------------------------------------------------------------------------- /det/modeling/backbone/lpf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.parallel 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from IPython import embed 7 | 8 | 9 | class Downsample(nn.Module): 10 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): 11 | super(Downsample, self).__init__() 12 | self.filt_size = filt_size 13 | self.pad_off = pad_off 14 | self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))] 15 | self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes] 16 | self.stride = stride 17 | self.off = int((self.stride-1)/2.) 18 | self.channels = channels 19 | 20 | # print('Filter size [%i]'%filt_size) 21 | if(self.filt_size==1): 22 | a = np.array([1.,]) 23 | elif(self.filt_size==2): 24 | a = np.array([1., 1.]) 25 | elif(self.filt_size==3): 26 | a = np.array([1., 2., 1.]) 27 | elif(self.filt_size==4): 28 | a = np.array([1., 3., 3., 1.]) 29 | elif(self.filt_size==5): 30 | a = np.array([1., 4., 6., 4., 1.]) 31 | elif(self.filt_size==6): 32 | a = np.array([1., 5., 10., 10., 5., 1.]) 33 | elif(self.filt_size==7): 34 | a = np.array([1., 6., 15., 20., 15., 6., 1.]) 35 | 36 | filt = torch.Tensor(a[:,None]*a[None,:]) 37 | filt = filt/torch.sum(filt) 38 | self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1))) 39 | 40 | self.pad = get_pad_layer(pad_type)(self.pad_sizes) 41 | 42 | def forward(self, inp): 43 | if(self.filt_size==1): 44 | if(self.pad_off==0): 45 | return inp[:,:,::self.stride,::self.stride] 46 | else: 47 | return self.pad(inp)[:,:,::self.stride,::self.stride] 48 | else: 49 | return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) 50 | 51 | def get_pad_layer(pad_type): 52 | if(pad_type in ['refl','reflect']): 53 | PadLayer = nn.ReflectionPad2d 54 | elif(pad_type in ['repl','replicate']): 55 | PadLayer = nn.ReplicationPad2d 56 | elif(pad_type=='zero'): 57 | PadLayer = nn.ZeroPad2d 58 | else: 59 | print('Pad type [%s] not recognized'%pad_type) 60 | return PadLayer 61 | 62 | 63 | class Downsample1D(nn.Module): 64 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): 65 | super(Downsample1D, self).__init__() 66 | self.filt_size = filt_size 67 | self.pad_off = pad_off 68 | self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))] 69 | self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes] 70 | self.stride = stride 71 | self.off = int((self.stride - 1) / 2.) 72 | self.channels = channels 73 | 74 | # print('Filter size [%i]' % filt_size) 75 | if(self.filt_size == 1): 76 | a = np.array([1., ]) 77 | elif(self.filt_size == 2): 78 | a = np.array([1., 1.]) 79 | elif(self.filt_size == 3): 80 | a = np.array([1., 2., 1.]) 81 | elif(self.filt_size == 4): 82 | a = np.array([1., 3., 3., 1.]) 83 | elif(self.filt_size == 5): 84 | a = np.array([1., 4., 6., 4., 1.]) 85 | elif(self.filt_size == 6): 86 | a = np.array([1., 5., 10., 10., 5., 1.]) 87 | elif(self.filt_size == 7): 88 | a = np.array([1., 6., 15., 20., 15., 6., 1.]) 89 | 90 | filt = torch.Tensor(a) 91 | filt = filt / torch.sum(filt) 92 | self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1))) 93 | 94 | self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes) 95 | 96 | def forward(self, inp): 97 | if(self.filt_size == 1): 98 | if(self.pad_off == 0): 99 | return inp[:, :, ::self.stride] 100 | else: 101 | return self.pad(inp)[:, :, ::self.stride] 102 | else: 103 | return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) 104 | 105 | 106 | def get_pad_layer_1d(pad_type): 107 | if(pad_type in ['refl', 'reflect']): 108 | PadLayer = nn.ReflectionPad1d 109 | elif(pad_type in ['repl', 'replicate']): 110 | PadLayer = nn.ReplicationPad1d 111 | elif(pad_type == 'zero'): 112 | PadLayer = nn.ZeroPad1d 113 | else: 114 | print('Pad type [%s] not recognized' % pad_type) 115 | return PadLayer 116 | -------------------------------------------------------------------------------- /det/modeling/backbone/mobilenet.py: -------------------------------------------------------------------------------- 1 | # taken from https://github.com/tonylins/pytorch-mobilenet-v2/ 2 | # Published by Ji Lin, tonylins 3 | # licensed under the Apache License, Version 2.0, January 2004 4 | 5 | from torch import nn 6 | from torch.nn import BatchNorm2d 7 | #from detectron2.layers.batch_norm import NaiveSyncBatchNorm as BatchNorm2d 8 | from detectron2.layers import Conv2d 9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 10 | from detectron2.modeling.backbone import Backbone 11 | 12 | 13 | def conv_bn(inp, oup, stride): 14 | return nn.Sequential( 15 | Conv2d(inp, oup, 3, stride, 1, bias=False), 16 | BatchNorm2d(oup), 17 | nn.ReLU6(inplace=True) 18 | ) 19 | 20 | 21 | def conv_1x1_bn(inp, oup): 22 | return nn.Sequential( 23 | Conv2d(inp, oup, 1, 1, 0, bias=False), 24 | BatchNorm2d(oup), 25 | nn.ReLU6(inplace=True) 26 | ) 27 | 28 | 29 | class InvertedResidual(nn.Module): 30 | def __init__(self, inp, oup, stride, expand_ratio): 31 | super(InvertedResidual, self).__init__() 32 | self.stride = stride 33 | assert stride in [1, 2] 34 | 35 | hidden_dim = int(round(inp * expand_ratio)) 36 | self.use_res_connect = self.stride == 1 and inp == oup 37 | 38 | if expand_ratio == 1: 39 | self.conv = nn.Sequential( 40 | # dw 41 | Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 42 | BatchNorm2d(hidden_dim), 43 | nn.ReLU6(inplace=True), 44 | # pw-linear 45 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 46 | BatchNorm2d(oup), 47 | ) 48 | else: 49 | self.conv = nn.Sequential( 50 | # pw 51 | Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 52 | BatchNorm2d(hidden_dim), 53 | nn.ReLU6(inplace=True), 54 | # dw 55 | Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 56 | BatchNorm2d(hidden_dim), 57 | nn.ReLU6(inplace=True), 58 | # pw-linear 59 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 60 | BatchNorm2d(oup), 61 | ) 62 | 63 | def forward(self, x): 64 | if self.use_res_connect: 65 | return x + self.conv(x) 66 | else: 67 | return self.conv(x) 68 | 69 | 70 | class MobileNetV2(Backbone): 71 | """ 72 | Should freeze bn 73 | """ 74 | def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.): 75 | super(MobileNetV2, self).__init__() 76 | block = InvertedResidual 77 | input_channel = 32 78 | interverted_residual_setting = [ 79 | # t, c, n, s 80 | [1, 16, 1, 1], 81 | [6, 24, 2, 2], 82 | [6, 32, 3, 2], 83 | [6, 64, 4, 2], 84 | [6, 96, 3, 1], 85 | [6, 160, 3, 2], 86 | [6, 320, 1, 1], 87 | ] 88 | 89 | # building first layer 90 | assert input_size % 32 == 0 91 | input_channel = int(input_channel * width_mult) 92 | self.return_features_indices = [3, 6, 13, 17] 93 | self.return_features_num_channels = [] 94 | self.features = nn.ModuleList([conv_bn(3, input_channel, 2)]) 95 | # building inverted residual blocks 96 | for t, c, n, s in interverted_residual_setting: 97 | output_channel = int(c * width_mult) 98 | for i in range(n): 99 | if i == 0: 100 | self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) 101 | else: 102 | self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) 103 | input_channel = output_channel 104 | if len(self.features) - 1 in self.return_features_indices: 105 | self.return_features_num_channels.append(output_channel) 106 | 107 | self._initialize_weights() 108 | self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT) 109 | 110 | def _freeze_backbone(self, freeze_at): 111 | for layer_index in range(freeze_at): 112 | for p in self.features[layer_index].parameters(): 113 | p.requires_grad = False 114 | 115 | def forward(self, x): 116 | res = [] 117 | for i, m in enumerate(self.features): 118 | x = m(x) 119 | if i in self.return_features_indices: 120 | res.append(x) 121 | return {'res{}'.format(i + 2): r for i, r in enumerate(res)} 122 | 123 | def _initialize_weights(self): 124 | for m in self.modules(): 125 | if isinstance(m, Conv2d): 126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 127 | m.weight.data.normal_(0, (2. / n) ** 0.5) 128 | if m.bias is not None: 129 | m.bias.data.zero_() 130 | elif isinstance(m, BatchNorm2d): 131 | m.weight.data.fill_(1) 132 | m.bias.data.zero_() 133 | elif isinstance(m, nn.Linear): 134 | n = m.weight.size(1) 135 | m.weight.data.normal_(0, 0.01) 136 | m.bias.data.zero_() 137 | 138 | @BACKBONE_REGISTRY.register() 139 | def build_mnv2_backbone(cfg, input_shape): 140 | """ 141 | Create a ResNet instance from config. 142 | 143 | Returns: 144 | ResNet: a :class:`ResNet` instance. 145 | """ 146 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES 147 | 148 | out_feature_channels = {"res2": 24, "res3": 32, 149 | "res4": 96, "res5": 320} 150 | out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32} 151 | model = MobileNetV2(cfg) 152 | model._out_features = out_features 153 | model._out_feature_channels = out_feature_channels 154 | model._out_feature_strides = out_feature_strides 155 | return model 156 | -------------------------------------------------------------------------------- /det/modeling/backbone/resnet_interval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from detectron2.layers import FrozenBatchNorm2d 3 | from detectron2.modeling.backbone import BACKBONE_REGISTRY 4 | from detectron2.modeling.backbone.resnet import ( 5 | BasicStem, 6 | DeformBottleneckBlock, 7 | BottleneckBlock, 8 | ResNet, 9 | ) 10 | 11 | 12 | def make_stage_intervals(block_class, num_blocks, first_stride, **kwargs): 13 | """ 14 | Create a resnet stage by creating many blocks. 15 | Args: 16 | block_class (class): a subclass of ResNetBlockBase 17 | num_blocks (int): 18 | first_stride (int): the stride of the first block. The other blocks will have stride=1. 19 | A `stride` argument will be passed to the block constructor. 20 | kwargs: other arguments passed to the block constructor. 21 | 22 | Returns: 23 | list[nn.Module]: a list of block module. 24 | """ 25 | blocks = [] 26 | conv_kwargs = {key: kwargs[key] for key in kwargs if "deform" not in key} 27 | deform_kwargs = {key: kwargs[key] for key in kwargs if key != "deform_interval"} 28 | deform_interval = kwargs.get("deform_interval", None) 29 | for i in range(num_blocks): 30 | if deform_interval and i % deform_interval == 0: 31 | blocks.append(block_class(stride=first_stride if i == 0 else 1, **deform_kwargs)) 32 | else: 33 | blocks.append(BottleneckBlock(stride=first_stride if i == 0 else 1, **conv_kwargs)) 34 | conv_kwargs["in_channels"] = conv_kwargs["out_channels"] 35 | deform_kwargs["in_channels"] = deform_kwargs["out_channels"] 36 | return blocks 37 | 38 | 39 | @BACKBONE_REGISTRY.register() 40 | def build_resnet_interval_backbone(cfg, input_shape): 41 | """ 42 | Create a ResNet instance from config. 43 | 44 | Returns: 45 | ResNet: a :class:`ResNet` instance. 46 | """ 47 | # need registration of new blocks/stems? 48 | norm = cfg.MODEL.RESNETS.NORM 49 | stem = BasicStem( 50 | in_channels=input_shape.channels, 51 | out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, 52 | norm=norm, 53 | ) 54 | freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT 55 | 56 | if freeze_at >= 1: 57 | for p in stem.parameters(): 58 | p.requires_grad = False 59 | stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem) 60 | 61 | # fmt: off 62 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES 63 | depth = cfg.MODEL.RESNETS.DEPTH 64 | num_groups = cfg.MODEL.RESNETS.NUM_GROUPS 65 | width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP 66 | bottleneck_channels = num_groups * width_per_group 67 | in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS 68 | out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 69 | stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 70 | res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION 71 | deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE 72 | deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED 73 | deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS 74 | deform_interval = cfg.MODEL.RESNETS.DEFORM_INTERVAL 75 | # fmt: on 76 | assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) 77 | 78 | num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] 79 | 80 | stages = [] 81 | 82 | # Avoid creating variables without gradients 83 | # It consumes extra memory and may cause allreduce to fail 84 | out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] 85 | max_stage_idx = max(out_stage_idx) 86 | for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): 87 | dilation = res5_dilation if stage_idx == 5 else 1 88 | first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 89 | stage_kargs = { 90 | "num_blocks": num_blocks_per_stage[idx], 91 | "first_stride": first_stride, 92 | "in_channels": in_channels, 93 | "bottleneck_channels": bottleneck_channels, 94 | "out_channels": out_channels, 95 | "num_groups": num_groups, 96 | "norm": norm, 97 | "stride_in_1x1": stride_in_1x1, 98 | "dilation": dilation, 99 | } 100 | if deform_on_per_stage[idx]: 101 | stage_kargs["block_class"] = DeformBottleneckBlock 102 | stage_kargs["deform_modulated"] = deform_modulated 103 | stage_kargs["deform_num_groups"] = deform_num_groups 104 | stage_kargs["deform_interval"] = deform_interval 105 | else: 106 | stage_kargs["block_class"] = BottleneckBlock 107 | blocks = make_stage_intervals(**stage_kargs) 108 | in_channels = out_channels 109 | out_channels *= 2 110 | bottleneck_channels *= 2 111 | 112 | if freeze_at >= stage_idx: 113 | for block in blocks: 114 | block.freeze() 115 | stages.append(blocks) 116 | return ResNet(stem, stages, out_features=out_features) 117 | -------------------------------------------------------------------------------- /det/modeling/backbone/splat.py: -------------------------------------------------------------------------------- 1 | """Split-Attention""" 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | from torch.nn import Module, Linear, BatchNorm2d, ReLU 7 | from torch.nn.modules.utils import _pair 8 | 9 | from detectron2.layers import ( 10 | Conv2d, 11 | DeformConv, 12 | FrozenBatchNorm2d, 13 | ModulatedDeformConv, 14 | ShapeSpec, 15 | get_norm, 16 | ) 17 | 18 | __all__ = ['SplAtConv2d', 'SplAtConv2d_dcn'] 19 | 20 | class SplAtConv2d(Module): 21 | """Split-Attention Conv2d 22 | """ 23 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), 24 | dilation=(1, 1), groups=1, bias=True, 25 | radix=2, reduction_factor=4, 26 | rectify=False, rectify_avg=False, norm=None, 27 | dropblock_prob=0.0, **kwargs): 28 | super(SplAtConv2d, self).__init__() 29 | padding = _pair(padding) 30 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) 31 | self.rectify_avg = rectify_avg 32 | inter_channels = max(in_channels*radix//reduction_factor, 32) 33 | self.radix = radix 34 | self.cardinality = groups 35 | self.channels = channels 36 | self.dropblock_prob = dropblock_prob 37 | if self.rectify: 38 | from rfconv import RFConv2d 39 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 40 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs) 41 | else: 42 | self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 43 | groups=groups*radix, bias=bias, **kwargs) 44 | self.use_bn = norm is not None 45 | if self.use_bn: 46 | self.bn0 = get_norm(norm, channels*radix) 47 | self.relu = ReLU(inplace=True) 48 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) 49 | if self.use_bn: 50 | self.bn1 = get_norm(norm, inter_channels) 51 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality) 52 | if dropblock_prob > 0.0: 53 | self.dropblock = DropBlock2D(dropblock_prob, 3) 54 | self.rsoftmax = rSoftMax(radix, groups) 55 | 56 | def forward(self, x): 57 | x = self.conv(x) 58 | if self.use_bn: 59 | x = self.bn0(x) 60 | if self.dropblock_prob > 0.0: 61 | x = self.dropblock(x) 62 | x = self.relu(x) 63 | 64 | batch, rchannel = x.shape[:2] 65 | if self.radix > 1: 66 | splited = torch.split(x, rchannel//self.radix, dim=1) 67 | gap = sum(splited) 68 | else: 69 | gap = x 70 | gap = F.adaptive_avg_pool2d(gap, 1) 71 | gap = self.fc1(gap) 72 | 73 | if self.use_bn: 74 | gap = self.bn1(gap) 75 | gap = self.relu(gap) 76 | 77 | atten = self.fc2(gap) 78 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1) 79 | 80 | if self.radix > 1: 81 | attens = torch.split(atten, rchannel//self.radix, dim=1) 82 | out = sum([att*split for (att, split) in zip(attens, splited)]) 83 | else: 84 | out = atten * x 85 | return out.contiguous() 86 | 87 | class rSoftMax(nn.Module): 88 | def __init__(self, radix, cardinality): 89 | super().__init__() 90 | self.radix = radix 91 | self.cardinality = cardinality 92 | 93 | def forward(self, x): 94 | batch = x.size(0) 95 | if self.radix > 1: 96 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 97 | x = F.softmax(x, dim=1) 98 | x = x.reshape(batch, -1) 99 | else: 100 | x = torch.sigmoid(x) 101 | return x 102 | 103 | 104 | class SplAtConv2d_dcn(Module): 105 | """Split-Attention Conv2d with dcn 106 | """ 107 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), 108 | dilation=(1, 1), groups=1, bias=True, 109 | radix=2, reduction_factor=4, 110 | rectify=False, rectify_avg=False, norm=None, 111 | dropblock_prob=0.0, 112 | deform_conv_op=None, 113 | deformable_groups=1, 114 | deform_modulated=False, 115 | **kwargs): 116 | super(SplAtConv2d_dcn, self).__init__() 117 | self.deform_modulated = deform_modulated 118 | 119 | padding = _pair(padding) 120 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) 121 | self.rectify_avg = rectify_avg 122 | inter_channels = max(in_channels*radix//reduction_factor, 32) 123 | self.radix = radix 124 | self.cardinality = groups 125 | self.channels = channels 126 | self.dropblock_prob = dropblock_prob 127 | if self.rectify: 128 | from rfconv import RFConv2d 129 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 130 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs) 131 | else: 132 | self.conv = deform_conv_op(in_channels, channels*radix, kernel_size, stride, padding[0], dilation, 133 | groups=groups*radix, bias=bias, deformable_groups=deformable_groups, **kwargs) 134 | self.use_bn = norm is not None 135 | if self.use_bn: 136 | self.bn0 = get_norm(norm, channels*radix) 137 | self.relu = ReLU(inplace=True) 138 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) 139 | if self.use_bn: 140 | self.bn1 = get_norm(norm, inter_channels) 141 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality) 142 | if dropblock_prob > 0.0: 143 | self.dropblock = DropBlock2D(dropblock_prob, 3) 144 | self.rsoftmax = rSoftMax(radix, groups) 145 | 146 | def forward(self, x, offset_input): 147 | 148 | if self.deform_modulated: 149 | offset_x, offset_y, mask = torch.chunk(offset_input, 3, dim=1) 150 | offset = torch.cat((offset_x, offset_y), dim=1) 151 | mask = mask.sigmoid() 152 | x = self.conv(x, offset, mask) 153 | else: 154 | x = self.conv(x, offset_input) 155 | 156 | if self.use_bn: 157 | x = self.bn0(x) 158 | if self.dropblock_prob > 0.0: 159 | x = self.dropblock(x) 160 | x = self.relu(x) 161 | 162 | batch, rchannel = x.shape[:2] 163 | if self.radix > 1: 164 | splited = torch.split(x, rchannel//self.radix, dim=1) 165 | gap = sum(splited) 166 | else: 167 | gap = x 168 | gap = F.adaptive_avg_pool2d(gap, 1) 169 | gap = self.fc1(gap) 170 | 171 | if self.use_bn: 172 | gap = self.bn1(gap) 173 | gap = self.relu(gap) 174 | 175 | atten = self.fc2(gap) 176 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1) 177 | 178 | if self.radix > 1: 179 | attens = torch.split(atten, rchannel//self.radix, dim=1) 180 | out = sum([att*split for (att, split) in zip(attens, splited)]) 181 | else: 182 | out = atten * x 183 | return out.contiguous() -------------------------------------------------------------------------------- /det/modeling/fcos/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcos import FCOS 2 | -------------------------------------------------------------------------------- /det/modeling/fcos/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/fcos/__pycache__/fcos.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/fcos/__pycache__/fcos_outputs.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/one_stage_detector.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from torch import nn 3 | 4 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY 5 | from detectron2.modeling import ProposalNetwork, GeneralizedRCNN 6 | from detectron2.utils.events import get_event_storage 7 | from detectron2.utils.logger import log_first_n 8 | from detectron2.modeling.postprocessing import detector_postprocess as d2_postprocesss 9 | 10 | 11 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): 12 | """ 13 | In addition to the post processing of detectron2, we add scalign for 14 | bezier control points. 15 | """ 16 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) 17 | results = d2_postprocesss(results, output_height, output_width, mask_threshold) 18 | 19 | # scale bezier points 20 | if results.has("beziers"): 21 | beziers = results.beziers 22 | # scale and clip in place 23 | beziers[:, 0::2] *= scale_x 24 | beziers[:, 1::2] *= scale_y 25 | h, w = results.image_size 26 | beziers[:, 0].clamp_(min=0, max=w) 27 | beziers[:, 1].clamp_(min=0, max=h) 28 | beziers[:, 6].clamp_(min=0, max=w) 29 | beziers[:, 7].clamp_(min=0, max=h) 30 | beziers[:, 8].clamp_(min=0, max=w) 31 | beziers[:, 9].clamp_(min=0, max=h) 32 | beziers[:, 14].clamp_(min=0, max=w) 33 | beziers[:, 15].clamp_(min=0, max=h) 34 | 35 | return results 36 | 37 | 38 | @META_ARCH_REGISTRY.register() 39 | class OneStageDetector(ProposalNetwork): 40 | """ 41 | Same as :class:`detectron2.modeling.ProposalNetwork`. 42 | Uses "instances" as the return key instead of using "proposal". 43 | """ 44 | def forward(self, batched_inputs): 45 | if self.training: 46 | return super().forward(batched_inputs) 47 | processed_results = super().forward(batched_inputs) 48 | processed_results = [{"instances": r["proposals"]} for r in processed_results] 49 | return processed_results 50 | 51 | 52 | def build_top_module(cfg): 53 | top_type = cfg.MODEL.TOP_MODULE.NAME 54 | if top_type == "conv": 55 | inp = cfg.MODEL.FPN.OUT_CHANNELS 56 | oup = cfg.MODEL.TOP_MODULE.DIM 57 | top_module = nn.Conv2d( 58 | inp, oup, 59 | kernel_size=3, stride=1, padding=1) 60 | else: 61 | top_module = None 62 | return top_module 63 | 64 | 65 | @META_ARCH_REGISTRY.register() 66 | class OneStageRCNN(GeneralizedRCNN): 67 | """ 68 | Same as :class:`detectron2.modeling.ProposalNetwork`. 69 | Use one stage detector and a second stage for instance-wise prediction. 70 | """ 71 | def __init__(self, cfg): 72 | super().__init__(cfg) 73 | self.top_module = build_top_module(cfg) 74 | self.to(self.device) 75 | 76 | def forward(self, batched_inputs): 77 | """ 78 | Args: 79 | batched_inputs: a list, batched outputs of :class:`DatasetMapper` . 80 | Each item in the list contains the inputs for one image. 81 | For now, each item in the list is a dict that contains: 82 | 83 | * image: Tensor, image in (C, H, W) format. 84 | * instances (optional): groundtruth :class:`Instances` 85 | * proposals (optional): :class:`Instances`, precomputed proposals. 86 | 87 | Other information that's included in the original dicts, such as: 88 | 89 | * "height", "width" (int): the output resolution of the model, used in inference. 90 | See :meth:`postprocess` for details. 91 | 92 | Returns: 93 | list[dict]: 94 | Each dict is the output for one input image. 95 | The dict contains one key "instances" whose value is a :class:`Instances`. 96 | The :class:`Instances` object has the following keys: 97 | "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" 98 | """ 99 | if not self.training: 100 | return self.inference(batched_inputs) 101 | 102 | images = self.preprocess_image(batched_inputs) 103 | if "instances" in batched_inputs[0]: 104 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs] 105 | elif "targets" in batched_inputs[0]: 106 | log_first_n( 107 | logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 108 | ) 109 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs] 110 | else: 111 | gt_instances = None 112 | 113 | features = self.backbone(images.tensor) 114 | 115 | if self.proposal_generator: 116 | proposals, proposal_losses = self.proposal_generator( 117 | images, features, gt_instances, self.top_module) 118 | else: 119 | assert "proposals" in batched_inputs[0] 120 | proposals = [x["proposals"].to(self.device) for x in batched_inputs] 121 | proposal_losses = {} 122 | 123 | _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) 124 | if self.vis_period > 0: 125 | storage = get_event_storage() 126 | if storage.iter % self.vis_period == 0: 127 | self.visualize_training(batched_inputs, proposals) 128 | 129 | losses = {} 130 | losses.update(detector_losses) 131 | losses.update(proposal_losses) 132 | return losses 133 | 134 | def inference(self, batched_inputs, detected_instances=None, do_postprocess=True): 135 | """ 136 | Run inference on the given inputs. 137 | 138 | Args: 139 | batched_inputs (list[dict]): same as in :meth:`forward` 140 | detected_instances (None or list[Instances]): if not None, it 141 | contains an `Instances` object per image. The `Instances` 142 | object contains "pred_boxes" and "pred_classes" which are 143 | known boxes in the image. 144 | The inference will then skip the detection of bounding boxes, 145 | and only predict other per-ROI outputs. 146 | do_postprocess (bool): whether to apply post-processing on the outputs. 147 | 148 | Returns: 149 | same as in :meth:`forward`. 150 | """ 151 | assert not self.training 152 | 153 | images = self.preprocess_image(batched_inputs) 154 | features = self.backbone(images.tensor) 155 | 156 | if detected_instances is None: 157 | if self.proposal_generator: 158 | proposals, _ = self.proposal_generator( 159 | images, features, None, self.top_module) 160 | else: 161 | assert "proposals" in batched_inputs[0] 162 | proposals = [x["proposals"].to(self.device) for x in batched_inputs] 163 | 164 | results, _ = self.roi_heads(images, features, proposals, None) 165 | else: 166 | detected_instances = [x.to(self.device) for x in detected_instances] 167 | results = self.roi_heads.forward_with_given_boxes(features, detected_instances) 168 | 169 | if do_postprocess: 170 | return OneStageRCNN._postprocess(results, batched_inputs, images.image_sizes) 171 | else: 172 | return results 173 | 174 | @staticmethod 175 | def _postprocess(instances, batched_inputs, image_sizes): 176 | """ 177 | Rescale the output instances to the target size. 178 | """ 179 | # note: private function; subject to changes 180 | processed_results = [] 181 | for results_per_image, input_per_image, image_size in zip( 182 | instances, batched_inputs, image_sizes 183 | ): 184 | height = input_per_image.get("height", image_size[0]) 185 | width = input_per_image.get("width", image_size[1]) 186 | r = detector_postprocess(results_per_image, height, width) 187 | processed_results.append({"instances": r}) 188 | return processed_results -------------------------------------------------------------------------------- /det/modeling/poolers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | from torch import nn 4 | from detectron2.layers import cat 5 | 6 | from detectron2.modeling.poolers import ( 7 | ROIPooler, convert_boxes_to_pooler_format, assign_boxes_to_levels 8 | ) 9 | 10 | from det.layers import BezierAlign 11 | from det.structures import Beziers 12 | 13 | __all__ = ["TopPooler"] 14 | 15 | 16 | def _box_max_size(boxes): 17 | box = boxes.tensor 18 | max_size = torch.max(box[:, 2] - box[:, 0], box[:, 3] - box[:, 1]) 19 | return max_size 20 | 21 | 22 | def _bezier_height(beziers): 23 | beziers = beziers.tensor 24 | # compute the distance between the first and last control point 25 | p1 = beziers[:, :2] 26 | p2 = beziers[:, 14:] 27 | height = ((p1 - p2) ** 2).sum(dim=1).sqrt() 28 | return height 29 | 30 | 31 | def assign_boxes_to_levels_by_metric( 32 | box_lists, min_level, max_level, canonical_box_size, 33 | canonical_level, metric_fn=_box_max_size): 34 | """ 35 | Map each box in `box_lists` to a feature map level index and return the assignment 36 | vector. 37 | 38 | Args: 39 | box_lists (list[detectron2.structures.Boxes]): A list of N Boxes or N RotatedBoxes, 40 | where N is the number of images in the batch. 41 | min_level (int): Smallest feature map level index. The input is considered index 0, 42 | the output of stage 1 is index 1, and so. 43 | max_level (int): Largest feature map level index. 44 | canonical_box_size (int): A canonical box size in pixels (shorter side). 45 | canonical_level (int): The feature map level index on which a canonically-sized box 46 | should be placed. 47 | 48 | Returns: 49 | A tensor of length M, where M is the total number of boxes aggregated over all 50 | N batch images. The memory layout corresponds to the concatenation of boxes 51 | from all images. Each element is the feature map index, as an offset from 52 | `self.min_level`, for the corresponding box (so value i means the box is at 53 | `self.min_level + i`). 54 | """ 55 | eps = sys.float_info.epsilon 56 | box_sizes = cat([metric_fn(boxes) for boxes in box_lists]) 57 | # Eqn.(1) in FPN paper 58 | level_assignments = torch.floor( 59 | canonical_level + torch.log2(box_sizes / canonical_box_size + eps) 60 | ) 61 | level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) 62 | return level_assignments.to(torch.int64) - min_level 63 | 64 | 65 | def assign_boxes_to_levels_max( 66 | box_lists, min_level, max_level, canonical_box_size, 67 | canonical_level): 68 | return assign_boxes_to_levels_by_metric( 69 | box_lists, min_level, max_level, canonical_box_size, 70 | canonical_level, metric_fn=_box_max_size 71 | ) 72 | 73 | 74 | def assign_boxes_to_levels_bezier( 75 | box_lists, min_level, max_level, canonical_box_size, 76 | canonical_level): 77 | return assign_boxes_to_levels_by_metric( 78 | box_lists, min_level, max_level, canonical_box_size, 79 | canonical_level, metric_fn=_bezier_height 80 | ) 81 | 82 | 83 | class TopPooler(ROIPooler): 84 | """ 85 | ROIPooler with option to assign level by max length. Used by top modules. 86 | """ 87 | def __init__(self, 88 | output_size, 89 | scales, 90 | sampling_ratio, 91 | pooler_type, 92 | canonical_box_size=224, 93 | canonical_level=4, 94 | assign_crit="area",): 95 | # to reuse the parent initialization, handle unsupported pooler types 96 | parent_pooler_type = "ROIAlign" if pooler_type == "BezierAlign" else pooler_type 97 | super().__init__(output_size, scales, sampling_ratio, parent_pooler_type, 98 | canonical_box_size=canonical_box_size, 99 | canonical_level=canonical_level) 100 | if parent_pooler_type != pooler_type: 101 | # reinit the level_poolers here 102 | self.level_poolers = nn.ModuleList( 103 | BezierAlign( 104 | output_size, spatial_scale=scale, 105 | sampling_ratio=sampling_ratio) for scale in scales 106 | ) 107 | self.assign_crit = assign_crit 108 | 109 | def forward(self, x, box_lists): 110 | """ 111 | see 112 | """ 113 | num_level_assignments = len(self.level_poolers) 114 | 115 | assert isinstance(x, list) and isinstance( 116 | box_lists, list 117 | ), "Arguments to pooler must be lists" 118 | assert ( 119 | len(x) == num_level_assignments 120 | ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( 121 | num_level_assignments, len(x) 122 | ) 123 | 124 | assert len(box_lists) == x[0].size( 125 | 0 126 | ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( 127 | x[0].size(0), len(box_lists) 128 | ) 129 | 130 | if isinstance(box_lists[0], torch.Tensor): 131 | # TODO: use Beziers for data_mapper 132 | box_lists = [Beziers(x) for x in box_lists] 133 | pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) 134 | 135 | if num_level_assignments == 1: 136 | return self.level_poolers[0](x[0], pooler_fmt_boxes) 137 | 138 | if self.assign_crit == "max": 139 | assign_method = assign_boxes_to_levels_max 140 | elif self.assign_crit == "bezier": 141 | assign_method = assign_boxes_to_levels_bezier 142 | else: 143 | assign_method = assign_boxes_to_levels 144 | 145 | level_assignments = assign_method( 146 | box_lists, self.min_level, self.max_level, 147 | self.canonical_box_size, self.canonical_level) 148 | 149 | num_boxes = len(pooler_fmt_boxes) 150 | num_channels = x[0].shape[1] 151 | output_size = self.output_size 152 | 153 | dtype, device = x[0].dtype, x[0].device 154 | output = torch.zeros( 155 | (num_boxes, num_channels, output_size[0], output_size[1]), dtype=dtype, device=device 156 | ) 157 | 158 | for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)): 159 | inds = torch.nonzero(level_assignments == level).squeeze(1) 160 | pooler_fmt_boxes_level = pooler_fmt_boxes[inds] 161 | output[inds] = pooler(x_level, pooler_fmt_boxes_level) 162 | 163 | return output 164 | -------------------------------------------------------------------------------- /det/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .libra_rcnn import LibraRCNNROIHeads 3 | -------------------------------------------------------------------------------- /det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/modeling/roi_heads/__pycache__/libra_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /det/modeling/roi_heads/libra_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import numpy as np 4 | import torch 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | from detectron2.layers import ShapeSpec 9 | from detectron2.structures import Boxes, Instances, pairwise_iou 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs 11 | from detectron2.layers import batched_nms, cat 12 | from det.layers import BalancedL1Loss 13 | from detectron2.modeling.roi_heads import (StandardROIHeads, ROI_HEADS_REGISTRY) 14 | from detectron2.modeling.sampling import subsample_labels 15 | 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | 21 | class LibraCNNOutputs(FastRCNNOutputs): 22 | def __init__( 23 | self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, baclanced_l1_beta 24 | ): 25 | self.box2box_transform = box2box_transform 26 | self.num_preds_per_image = [len(p) for p in proposals] 27 | self.pred_class_logits = pred_class_logits 28 | self.pred_proposal_deltas = pred_proposal_deltas 29 | self.baclanced_l1_beta = baclanced_l1_beta 30 | 31 | box_type = type(proposals[0].proposal_boxes) 32 | # cat(..., dim=0) concatenates over all images in the batch 33 | self.proposals = box_type.cat([p.proposal_boxes for p in proposals]) 34 | assert not self.proposals.tensor.requires_grad, "Proposals should not require gradients!" 35 | self.image_shapes = [x.image_size for x in proposals] 36 | 37 | self.ba_l1_loss = BalancedL1Loss(alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0) 42 | 43 | # The following fields should exist only when training. 44 | if proposals[0].has("gt_boxes"): 45 | self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals]) 46 | assert proposals[0].has("gt_classes") 47 | self.gt_classes = cat([p.gt_classes for p in proposals], dim=0) 48 | 49 | def baclanced_l1_loss(self): 50 | """ 51 | Compute the baclanced L1 loss for box regression. 52 | 53 | Returns: 54 | scalar Tensor 55 | """ 56 | gt_proposal_deltas = self.box2box_transform.get_deltas( 57 | self.proposals.tensor, self.gt_boxes.tensor 58 | ) 59 | box_dim = gt_proposal_deltas.size(1) # 4 or 5 60 | cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim 61 | device = self.pred_proposal_deltas.device 62 | 63 | bg_class_ind = self.pred_class_logits.shape[1] - 1 64 | 65 | fg_inds = torch.nonzero((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)).squeeze( 66 | 1 67 | ) 68 | if cls_agnostic_bbox_reg: 69 | # pred_proposal_deltas only corresponds to foreground class for agnostic 70 | gt_class_cols = torch.arange(box_dim, device=device) 71 | else: 72 | fg_gt_classes = self.gt_classes[fg_inds] 73 | # pred_proposal_deltas for class k are located in columns [b * k : b * k + b], 74 | # where b is the dimension of box representation (4 or 5) 75 | # Note that compared to Detectron1, 76 | # we do not perform bounding box regression for background classes. 77 | gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device) 78 | 79 | 80 | loss_box_reg = self.ba_l1_loss( 81 | self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols], 82 | gt_proposal_deltas[fg_inds], 83 | self.baclanced_l1_beta, 84 | reduction="mean", 85 | ) 86 | 87 | loss_box_reg = loss_box_reg / self.gt_classes.numel() 88 | return loss_box_reg 89 | 90 | def losses(self): 91 | """ 92 | Compute the default losses for box head in Fast(er) R-CNN, 93 | with softmax cross entropy loss and smooth L1 loss. 94 | 95 | Returns: 96 | A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg". 97 | """ 98 | return { 99 | "loss_cls": self.softmax_cross_entropy_loss(), 100 | "loss_box_reg_ba": self.baclanced_l1_loss(), 101 | } 102 | 103 | @ROI_HEADS_REGISTRY.register() 104 | class LibraRCNNROIHeads(StandardROIHeads): 105 | 106 | def _sample_proposals(self, matched_idxs, matched_labels, gt_classes): 107 | """ 108 | modified from roi_heads 109 | """ 110 | has_gt = gt_classes.numel() > 0 111 | # Get the corresponding GT for each proposal 112 | if has_gt: 113 | gt_classes = gt_classes[matched_idxs] 114 | # Label unmatched proposals (0 label from matcher) as background (label=num_classes) 115 | gt_classes[matched_labels == 0] = self.num_classes 116 | # Label ignore proposals (-1 label) 117 | gt_classes[matched_labels == -1] = -1 118 | else: 119 | gt_classes = torch.zeros_like(matched_idxs) + self.num_classes 120 | 121 | sampled_fg_idxs, sampled_bg_idxs = subsample_labels( 122 | gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes 123 | ) 124 | 125 | sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) 126 | return sampled_idxs, gt_classes[sampled_idxs] 127 | 128 | 129 | def __forward_box(self, features, proposals): 130 | box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) 131 | box_features = self.box_head(box_features) 132 | pred_class_logits, pred_proposal_deltas = self.box_predictor(box_features) 133 | del box_features 134 | 135 | outputs = LibraRCNNOutputs( 136 | self.box2box_transform, 137 | pred_class_logits, 138 | pred_proposal_deltas, 139 | proposals, 140 | self.smooth_l1_beta, 141 | ) 142 | if self.training: 143 | if self.train_on_pred_boxes: 144 | pred_boxes = outputs.predict_boxes_for_gt_classes() 145 | for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): 146 | proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) 147 | return outputs.losses() 148 | else: 149 | pred_instances, _ = outputs.inference( 150 | self.test_score_thresh, self.test_nms_thresh, self.test_detections_per_img 151 | ) 152 | return pred_instances 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /det/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__init__.py -------------------------------------------------------------------------------- /det/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /det/utils/__pycache__/comm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TengFeiHan0/Object-Detection.pytorch/5c2000ba08e903255a146f9e003c87a2fb1cb5aa/det/utils/__pycache__/comm.cpython-36.pyc -------------------------------------------------------------------------------- /det/utils/comm.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | from detectron2.utils.comm import get_world_size 3 | 4 | 5 | def reduce_sum(tensor): 6 | world_size = get_world_size() 7 | if world_size < 2: 8 | return tensor 9 | tensor = tensor.clone() 10 | dist.all_reduce(tensor, op=dist.ReduceOp.SUM) 11 | return tensor 12 | -------------------------------------------------------------------------------- /det/utils/measures.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Adapted from https://github.com/ShichenLiu/CondenseNet/blob/master/utils.py 3 | from __future__ import absolute_import 4 | from __future__ import unicode_literals 5 | from __future__ import print_function 6 | from __future__ import division 7 | 8 | import operator 9 | 10 | from functools import reduce 11 | 12 | 13 | def get_num_gen(gen): 14 | return sum(1 for x in gen) 15 | 16 | 17 | def is_pruned(layer): 18 | try: 19 | layer.mask 20 | return True 21 | except AttributeError: 22 | return False 23 | 24 | 25 | def is_leaf(model): 26 | return get_num_gen(model.children()) == 0 27 | 28 | 29 | def get_layer_info(layer): 30 | layer_str = str(layer) 31 | type_name = layer_str[:layer_str.find('(')].strip() 32 | return type_name 33 | 34 | 35 | def get_layer_param(model): 36 | return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()]) 37 | 38 | 39 | ### The input batch size should be 1 to call this function 40 | def measure_layer(layer, *args): 41 | global count_ops, count_params 42 | 43 | for x in args: 44 | delta_ops = 0 45 | delta_params = 0 46 | multi_add = 1 47 | type_name = get_layer_info(layer) 48 | 49 | ### ops_conv 50 | if type_name in ['Conv2d']: 51 | out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] - layer.kernel_size[0]) / 52 | layer.stride[0] + 1) 53 | out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] - layer.kernel_size[1]) / 54 | layer.stride[1] + 1) 55 | delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add 56 | delta_params = get_layer_param(layer) 57 | 58 | elif type_name in ['ConvTranspose2d']: 59 | _, _, in_h, in_w = x.size() 60 | out_h = int((in_h-1)*layer.stride[0] - 2 * layer.padding[0] + layer.kernel_size[0] + layer.output_padding[0]) 61 | out_w = int((in_w-1)*layer.stride[1] - 2 * layer.padding[1] + layer.kernel_size[1] + layer.output_padding[1]) 62 | delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \ 63 | layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add 64 | delta_params = get_layer_param(layer) 65 | 66 | ### ops_learned_conv 67 | elif type_name in ['LearnedGroupConv']: 68 | measure_layer(layer.relu, x) 69 | measure_layer(layer.norm, x) 70 | conv = layer.conv 71 | out_h = int((x.size()[2] + 2 * conv.padding[0] - conv.kernel_size[0]) / 72 | conv.stride[0] + 1) 73 | out_w = int((x.size()[3] + 2 * conv.padding[1] - conv.kernel_size[1]) / 74 | conv.stride[1] + 1) 75 | delta_ops = conv.in_channels * conv.out_channels * conv.kernel_size[0] * conv.kernel_size[1] * out_h * out_w / layer.condense_factor * multi_add 76 | delta_params = get_layer_param(conv) / layer.condense_factor 77 | 78 | ### ops_nonlinearity 79 | elif type_name in ['ReLU', 'ReLU6']: 80 | delta_ops = x.numel() 81 | delta_params = get_layer_param(layer) 82 | 83 | ### ops_pooling 84 | elif type_name in ['AvgPool2d', 'MaxPool2d']: 85 | in_w = x.size()[2] 86 | kernel_ops = layer.kernel_size * layer.kernel_size 87 | out_w = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) 88 | out_h = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) 89 | delta_ops = x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops 90 | delta_params = get_layer_param(layer) 91 | 92 | elif type_name in ['LastLevelMaxPool']: 93 | pass 94 | 95 | elif type_name in ['AdaptiveAvgPool2d']: 96 | delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] 97 | delta_params = get_layer_param(layer) 98 | 99 | elif type_name in ['ZeroPad2d', 'RetinaNetPostProcessor']: 100 | pass 101 | #delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] 102 | #delta_params = get_layer_param(layer) 103 | 104 | ### ops_linear 105 | elif type_name in ['Linear']: 106 | weight_ops = layer.weight.numel() * multi_add 107 | bias_ops = layer.bias.numel() 108 | delta_ops = x.size()[0] * (weight_ops + bias_ops) 109 | delta_params = get_layer_param(layer) 110 | 111 | ### ops_nothing 112 | elif type_name in ['BatchNorm2d', 'Dropout2d', 'DropChannel', 'Dropout', 'FrozenBatchNorm2d', 'GroupNorm']: 113 | delta_params = get_layer_param(layer) 114 | 115 | elif type_name in ['SumTwo']: 116 | delta_ops = x.numel() 117 | 118 | elif type_name in ['AggregateCell']: 119 | if not layer.pre_transform: 120 | delta_ops = 2 * x.numel() # twice for each input 121 | else: 122 | measure_layer(layer.branch_1, x) 123 | measure_layer(layer.branch_2, x) 124 | delta_params = get_layer_param(layer) 125 | 126 | elif type_name in ['Identity', 'Zero']: 127 | pass 128 | 129 | elif type_name in ['Scale']: 130 | delta_params = get_layer_param(layer) 131 | delta_ops = x.numel() 132 | 133 | elif type_name in ['FCOSPostProcessor', 'RPNPostProcessor', 'KeypointPostProcessor', 134 | 'ROIAlign', 'PostProcessor', 'KeypointRCNNPredictor', 135 | 'NaiveSyncBatchNorm', 'Upsample', 'Sequential']: 136 | pass 137 | 138 | elif type_name in ['DeformConv']: 139 | # don't count bilinear 140 | offset_conv = list(layer.parameters())[0] 141 | delta_ops = reduce(operator.mul, offset_conv.size(), x.size()[2] * x.size()[3]) 142 | out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] 143 | - layer.kernel_size[0]) / layer.stride[0] + 1) 144 | out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] 145 | - layer.kernel_size[1]) / layer.stride[1] + 1) 146 | delta_ops += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add 147 | delta_params = get_layer_param(layer) 148 | 149 | ### unknown layer type 150 | else: 151 | raise TypeError('unknown layer type: %s' % type_name) 152 | 153 | count_ops += delta_ops 154 | count_params += delta_params 155 | return 156 | 157 | 158 | def measure_model(model, x): 159 | global count_ops, count_params 160 | count_ops = 0 161 | count_params = 0 162 | 163 | def should_measure(x): 164 | return is_leaf(x) or is_pruned(x) 165 | 166 | def modify_forward(model): 167 | for child in model.children(): 168 | if should_measure(child): 169 | def new_forward(m): 170 | def lambda_forward(*args): 171 | measure_layer(m, *args) 172 | return m.old_forward(*args) 173 | return lambda_forward 174 | child.old_forward = child.forward 175 | child.forward = new_forward(child) 176 | else: 177 | modify_forward(child) 178 | 179 | def restore_forward(model): 180 | for child in model.children(): 181 | # leaf node 182 | if is_leaf(child) and hasattr(child, 'old_forward'): 183 | child.forward = child.old_forward 184 | child.old_forward = None 185 | else: 186 | restore_forward(child) 187 | 188 | modify_forward(model) 189 | out = model.forward(x) 190 | restore_forward(model) 191 | 192 | return out, count_ops, count_params 193 | -------------------------------------------------------------------------------- /det/utils/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from detectron2.utils.visualizer import Visualizer 4 | 5 | 6 | class TextVisualizer(Visualizer): 7 | def draw_instance_predictions(self, predictions): 8 | beziers = predictions.beziers.numpy() 9 | scores = predictions.scores.tolist() 10 | recs = predictions.recs 11 | 12 | self.overlay_instances(beziers, recs, scores) 13 | 14 | return self.output 15 | 16 | def _bezier_to_poly(self, bezier): 17 | # bezier to polygon 18 | u = np.linspace(0, 1, 20) 19 | bezier = bezier.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4) 20 | points = np.outer((1 - u) ** 3, bezier[:, 0]) \ 21 | + np.outer(3 * u * ((1 - u) ** 2), bezier[:, 1]) \ 22 | + np.outer(3 * (u ** 2) * (1 - u), bezier[:, 2]) \ 23 | + np.outer(u ** 3, bezier[:, 3]) 24 | points = np.concatenate((points[:, :2], points[:, 2:]), axis=0) 25 | 26 | return points 27 | 28 | def _decode_recognition(self, rec): 29 | CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~'] 30 | 31 | s = '' 32 | for c in rec: 33 | c = int(c) 34 | if c < 95: 35 | s += CTLABELS[c] 36 | elif c == 95: 37 | s += u'口' 38 | return s 39 | 40 | def _ctc_decode_recognition(self, rec): 41 | CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~'] 42 | 43 | # ctc decoding 44 | last_char = False 45 | s = '' 46 | for c in rec: 47 | c = int(c) 48 | if c < 95: 49 | if last_char != c: 50 | s += CTLABELS[c] 51 | last_char = c 52 | elif c == 95: 53 | s += u'口' 54 | else: 55 | last_char = False 56 | return s 57 | 58 | def overlay_instances(self, beziers, recs, scores, alpha=0.5): 59 | color = (0.1, 0.2, 0.5) 60 | 61 | for bezier, rec, score in zip(beziers, recs, scores): 62 | polygon = self._bezier_to_poly(bezier) 63 | self.draw_polygon(polygon, color, alpha=alpha) 64 | 65 | # draw text in the top left corner 66 | text = self._decode_recognition(rec) 67 | text = "{:.3f}: {}".format(score, text) 68 | lighter_color = self._change_color_brightness(color, brightness_factor=0.7) 69 | text_pos = polygon[0] 70 | horiz_align = "left" 71 | font_size = self._default_font_size 72 | 73 | self.draw_text( 74 | text, 75 | text_pos, 76 | color=lighter_color, 77 | horizontal_alignment=horiz_align, 78 | font_size=font_size, 79 | ) -------------------------------------------------------------------------------- /docs/nms/README.md: -------------------------------------------------------------------------------- 1 | #NMS解析 2 | 3 | https://zhuanlan.zhihu.com/p/80902998 -------------------------------------------------------------------------------- /docs/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /docs/nms/demo_nms.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | static void sort( int n, const float* x, int* indices){ 7 | int i, j; 8 | for(i=0; i x[indices[i]]){ 11 | std::swap(indices[i], indices[j]); 12 | } 13 | 14 | } 15 | } 16 | 17 | } 18 | 19 | int nonMaximumSuppression( 20 | int numBoxes, const CvPoint *points, 21 | const CvPoint *oppositePoints, const float *score, 22 | float overlapThreshold, int *numBoxOut, CvPoint **pointsOut, 23 | CvPoint **oppositePointsOut, float *scoreOut){ 24 | 25 | 26 | int i, j, index; 27 | float *box_area = (float *)malloc(numBoxes*sizeof(float)); 28 | int *indices = (int *)malloc(numBoxes*sizeof(int)); 29 | int *is_suppressed = (int *)malloc(numBoxes*sizeof(int)); 30 | 31 | for(i=0; i 0 && overlapHeight >0){ 58 | float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ; 59 | if(overlapPart > overlapThreshold){ 60 | is_suppressed[indices[i]] =1; 61 | } 62 | } 63 | } 64 | } 65 | } 66 | } 67 | 68 | *numBoxOut = 0; 69 | for(int i = 0; i < numBoxes; i++){ 70 | if(!is_suppressed[i]) (*numBoxOut)++; 71 | } 72 | 73 | *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint)); 74 | *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint)); 75 | *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float)); 76 | 77 | index =0; 78 | 79 | for(int i = 0; i < numBoxes; i++){ 80 | if(!is_suppressed[indices[i]]){ 81 | (*pointsOut)[index].x = points[indices[i]].x; 82 | (*pointsOut)[index].y = points[indices[i]].y; 83 | (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x; 84 | (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y; 85 | (*scoreOut)[index] = score[indices[i]]; 86 | index++; 87 | } 88 | } 89 | 90 | free(indices); 91 | free(box_area); 92 | free(is_suppressed); 93 | return 1; 94 | } -------------------------------------------------------------------------------- /docs/nms/demo_nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | def compute_iou(box1, box2, wh= False): 4 | if wh == False: 5 | xmin1, ymin1, xmax1, ymax1 = box1 6 | xmin2, ymin2, xmax2, ymax2 = box2 7 | else: 8 | xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0) 9 | xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0) 10 | xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0) 11 | xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0) 12 | 13 | xx1 = np.max([xmin1, xmin2])0 14 | yy1 = np.max([ymin1, ymin2]) 15 | xx2 = np.max([xmax1, xmax2]) 16 | yy2 = np.max([ymax1, ymax2]) 17 | 18 | area1 = (xmax1 - xmin1)*(ymax1 - ymin1) 19 | area2 = (xmax2 - xmin2)*(ymax2 - ymin2) 20 | 21 | inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1]) 22 | iou = inter_area / (area1 + area2 - inter_area +1e-6) 23 | return iou 24 | 25 | def py_cpu_nms(dets, thresh): 26 | #dets某个类的框,x1、y1、x2、y2、以及置信度score 27 | #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]] 28 | x1 = dets[:, 0] 29 | y1 = dets[:, 1] 30 | x2 = dets[:, 2] 31 | y2 = dets[:, 3] 32 | scores = dets[:, 4] 33 | #面積 34 | areas = (x2- x1 +1)*(y2- y1 +1) 35 | order = scores.argsort()[::-1]#按照置信度降序排序 36 | keep = [] 37 | 38 | while order.size() >0: 39 | i = order[0]#保留得分最高的 40 | keep.append(i) 41 | #得到相交区域,左上及右下 42 | xx1 = np.maximum(x1[i], x1[order[1:]]) 43 | yy1 = np.maximum(y1[i], y1[order[1:]]) 44 | xx2 = np.minimum(x2[i], x2[order[1:]]) 45 | yy2 = np.minimum(y2[i], y2[order[1:]]) 46 | #得到相交区域,左上及右下 47 | w = np.maximum(0.0, xx2-xx1 +1) 48 | h = np.maximum(0.0, yy2-yy1 +1) 49 | 50 | inter = w*h 51 | #计算IoU:重叠面积 /(面积1+面积2-重叠面积) 52 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 53 | #保留IoU小于阈值的box 54 | inds = np.where(ovr <= thresh)[0] 55 | order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位 56 | 57 | return keep 58 | 59 | def iou(self, box1, box2): 60 | N = box1.size(0) 61 | M = box2.size(0) 62 | 63 | lt = torch.max( # 左上角的点 64 | box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2]->[N,1,2]->[N,M,2] 65 | box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2]->[1,M,2]->[N,M,2] 66 | ) 67 | 68 | rb = torch.min( 69 | box1[:, 2:].unsqueeze(1).expand(N, M, 2), 70 | box2[:, 2:].unsqueeze(0).expand(N, M, 2), 71 | ) 72 | wh = rb -lt 73 | wh[wh < 0] = 0 74 | 75 | inter = wh[:,:,0]*wh[:,:,1] 76 | area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1]) 77 | area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1]) 78 | area1 = area1.unsqueeze(1).expand(N, M) 79 | area2 = area2.unsqueeze(1).expand(N, M) 80 | 81 | iou = inter / (area1 + area2 - inter) 82 | 83 | return iou 84 | 85 | def nms(self, bboxes, scores, threshold=0.5): 86 | x1 = bboxes[:, 0] 87 | y1 = bboxes[:, 1] 88 | x2 = bboxes[:, 2] 89 | y2 = bboxes[:, 3] 90 | areas = (x2 - x1) * (y2 - y1) 91 | _, order = scores.sort(0, descending=True) 92 | keep =[] 93 | while order.numel()>0: 94 | if order.numel()==1: 95 | i = order.item() 96 | keep.append(i) 97 | else: 98 | i =order[0].item() 99 | keep.append(i) 100 | xx1 = x1[order[1:]].clamp(min= x1[i]) 101 | yy1 = y1[order[1:]].clamp(min= y1[i]) 102 | xx2 = x2[order[1:]].clamp(min= x2[i]) 103 | yy2 = y2[order[1:]].clamp(min= y2[i]) 104 | 105 | inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0) 106 | 107 | iou = inter / (areas[i] + areas[i] -inter) 108 | idx = (iou < threshold).nonzero().squeeze() 109 | 110 | order = order[idx +1] 111 | return torch.LongTensor(keep) 112 | 113 | 114 | -------------------------------------------------------------------------------- /docs/nms/nms.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | static void sort( int n, const float* x, int* indices){ 7 | int i, j; 8 | for(i=0; i x[indices[i]]){ 11 | std::swap(indices[i], indices[j]); 12 | } 13 | 14 | } 15 | } 16 | 17 | } 18 | 19 | int nonMaximumSuppression( 20 | int numBoxes, const CvPoint *points, 21 | const CvPoint *oppositePoints, const float *score, 22 | float overlapThreshold, int *numBoxOut, CvPoint **pointsOut, 23 | CvPoint **oppositePointsOut, float *scoreOut){ 24 | 25 | 26 | int i, j, index; 27 | float *box_area = (float *)malloc(numBoxes*sizeof(float)); 28 | int *indices = (int *)malloc(numBoxes*sizeof(int)); 29 | int *is_suppressed = (int *)malloc(numBoxes*sizeof(int)); 30 | 31 | for(i=0; i 0 && overlapHeight >0){ 58 | float overlapPart = (overlapWidth * overlapHeight)/ box_area[indices[i]] ; 59 | if(overlapPart > overlapThreshold){ 60 | is_suppressed[indices[i]] =1; 61 | } 62 | } 63 | } 64 | } 65 | } 66 | } 67 | 68 | *numBoxOut = 0; 69 | for(int i = 0; i < numBoxes; i++){ 70 | if(!is_suppressed[i]) (*numBoxOut)++; 71 | } 72 | 73 | *pointsOut = (CvPoint*)malloc((*numBoxOut) * sizeof(CvPoint)); 74 | *oppositePointsOut = (CvPoint*)malloc((*numBoxOut) *sizeof(CvPoint)); 75 | *scoreOut = (float*)malloc((*numBoxOut) * sizeof(float)); 76 | 77 | index =0; 78 | 79 | for(int i = 0; i < numBoxes; i++){ 80 | if(!is_suppressed[indices[i]]){ 81 | (*pointsOut)[index].x = points[indices[i]].x; 82 | (*pointsOut)[index].y = points[indices[i]].y; 83 | (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x; 84 | (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y; 85 | (*scoreOut)[index] = score[indices[i]]; 86 | index++; 87 | } 88 | } 89 | 90 | free(indices); 91 | free(box_area); 92 | free(is_suppressed); 93 | return 1; 94 | } -------------------------------------------------------------------------------- /docs/nms/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | def compute_iou(box1, box2, wh= False): 4 | if wh == False: 5 | xmin1, ymin1, xmax1, ymax1 = box1 6 | xmin2, ymin2, xmax2, ymax2 = box2 7 | else: 8 | xmin1, ymin1 = int((box1[0]- box1[2])/2.0), int((box1[1]- box1[3])/2.0) 9 | xmax1, ymax1 = int((box1[0]+ box1[2])/2.0), int((box1[1]+box1[3])/2.0) 10 | xmin2, ymin2 = int((box2[0]- box2[2])/2.0), int((box2[1]+box2[3])/2.0) 11 | xmax2, ymax2 = int((box2[0]+box2[2])/2.0), int((box2[1]+box2[3])/2.0) 12 | 13 | xx1 = np.max([xmin1, xmin2])0 14 | yy1 = np.max([ymin1, ymin2]) 15 | xx2 = np.max([xmax1, xmax2]) 16 | yy2 = np.max([ymax1, ymax2]) 17 | 18 | area1 = (xmax1 - xmin1)*(ymax1 - ymin1) 19 | area2 = (xmax2 - xmin2)*(ymax2 - ymin2) 20 | 21 | inter_area = (np.max([0, xx2-xx1]))* np.max([0, yy2-yy1]) 22 | iou = inter_area / (area1 + area2 - inter_area +1e-6) 23 | return iou 24 | 25 | def py_cpu_nms(dets, thresh): 26 | #dets某个类的框,x1、y1、x2、y2、以及置信度score 27 | #eg:dets为[[x1,y1,x2,y2,score],[x1,y1,y2,score]……]] 28 | x1 = dets[:, 0] 29 | y1 = dets[:, 1] 30 | x2 = dets[:, 2] 31 | y2 = dets[:, 3] 32 | scores = dets[:, 4] 33 | #面積 34 | areas = (x2- x1 +1)*(y2- y1 +1) 35 | order = scores.argsort()[::-1]#按照置信度降序排序 36 | keep = [] 37 | 38 | while order.size() >0: 39 | i = order[0]#保留得分最高的 40 | keep.append(i) 41 | #得到相交区域,左上及右下 42 | xx1 = np.maximum(x1[i], x1[order[1:]]) 43 | yy1 = np.maximum(y1[i], y1[order[1:]]) 44 | xx2 = np.minimum(x2[i], x2[order[1:]]) 45 | yy2 = np.minimum(y2[i], y2[order[1:]]) 46 | #得到相交区域,左上及右下 47 | w = np.maximum(0.0, xx2-xx1 +1) 48 | h = np.maximum(0.0, yy2-yy1 +1) 49 | 50 | inter = w*h 51 | #计算IoU:重叠面积 /(面积1+面积2-重叠面积) 52 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 53 | #保留IoU小于阈值的box 54 | inds = np.where(ovr <= thresh)[0] 55 | order = order[inds+1]#因为ovr数组的长度比order数组少一个,所以这里要将所有下标后移一位 56 | 57 | return keep 58 | 59 | def iou(self, box1, box2): 60 | N = box1.size(0) 61 | M = box2.size(0) 62 | 63 | lt = torch.max( # 左上角的点 64 | box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2]->[N,1,2]->[N,M,2] 65 | box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2]->[1,M,2]->[N,M,2] 66 | ) 67 | 68 | rb = torch.min( 69 | box1[:, 2:].unsqueeze(1).expand(N, M, 2), 70 | box2[:, 2:].unsqueeze(0).expand(N, M, 2), 71 | ) 72 | wh = rb -lt 73 | wh[wh < 0] = 0 74 | 75 | inter = wh[:,:,0]*wh[:,:,1] 76 | area1 = (box1[:,2] - box1[:,0])*(box1[:,3] - box1[:,1]) 77 | area2 = (box2[:,2] - box2[:,0])*(box2[:,3] - box2[:,1]) 78 | area1 = area1.unsqueeze(1).expand(N, M) 79 | area2 = area2.unsqueeze(1).expand(N, M) 80 | 81 | iou = inter / (area1 + area2 - inter) 82 | 83 | return iou 84 | 85 | def nms(self, bboxes, scores, threshold=0.5): 86 | x1 = bboxes[:, 0] 87 | y1 = bboxes[:, 1] 88 | x2 = bboxes[:, 2] 89 | y2 = bboxes[:, 3] 90 | areas = (x2 - x1) * (y2 - y1) 91 | _, order = scores.sort(0, descending=True) 92 | keep =[] 93 | while order.numel()>0: 94 | if order.numel()==1: 95 | i = order.item() 96 | keep.append(i) 97 | else: 98 | i =order[0].item() 99 | keep.append(i) 100 | xx1 = x1[order[1:]].clamp(min= x1[i]) 101 | yy1 = y1[order[1:]].clamp(min= y1[i]) 102 | xx2 = x2[order[1:]].clamp(min= x2[i]) 103 | yy2 = y2[order[1:]].clamp(min= y2[i]) 104 | 105 | inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0) 106 | 107 | iou = inter / (areas[i] + areas[i] -inter) 108 | idx = (iou < threshold).nonzero().squeeze() 109 | 110 | order = order[idx +1] 111 | return torch.LongTensor(keep) 112 | 113 | 114 | -------------------------------------------------------------------------------- /docs/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | 25 | Example: 26 | >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 27 | >>> [49.3, 32.9, 51.0, 35.3, 0.9], 28 | >>> [49.2, 31.8, 51.0, 35.4, 0.5], 29 | >>> [35.1, 11.5, 39.1, 15.7, 0.5], 30 | >>> [35.6, 11.8, 39.3, 14.2, 0.5], 31 | >>> [35.3, 11.5, 39.9, 14.5, 0.4], 32 | >>> [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32) 33 | >>> iou_thr = 0.7 34 | >>> supressed, inds = nms(dets, iou_thr) 35 | >>> assert len(inds) == len(supressed) == 3 36 | """ 37 | # convert dets (tensor or numpy array) to tensor 38 | if isinstance(dets, torch.Tensor): 39 | is_numpy = False 40 | dets_th = dets 41 | elif isinstance(dets, np.ndarray): 42 | is_numpy = True 43 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 44 | dets_th = torch.from_numpy(dets).to(device) 45 | else: 46 | raise TypeError( 47 | 'dets must be either a Tensor or numpy array, but got {}'.format( 48 | type(dets))) 49 | 50 | # execute cpu or cuda nms 51 | if dets_th.shape[0] == 0: 52 | inds = dets_th.new_zeros(0, dtype=torch.long) 53 | else: 54 | if dets_th.is_cuda: 55 | inds = nms_cuda.nms(dets_th, iou_thr) 56 | else: 57 | inds = nms_cpu.nms(dets_th, iou_thr) 58 | 59 | if is_numpy: 60 | inds = inds.cpu().numpy() 61 | return dets[inds, :], inds 62 | 63 | 64 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 65 | """ 66 | Example: 67 | >>> dets = np.array([[4., 3., 5., 3., 0.9], 68 | >>> [4., 3., 5., 4., 0.9], 69 | >>> [3., 1., 3., 1., 0.5], 70 | >>> [3., 1., 3., 1., 0.5], 71 | >>> [3., 1., 3., 1., 0.4], 72 | >>> [3., 1., 3., 1., 0.0]], dtype=np.float32) 73 | >>> iou_thr = 0.7 74 | >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5) 75 | >>> assert len(inds) == len(supressed) == 3 76 | """ 77 | if isinstance(dets, torch.Tensor): 78 | is_tensor = True 79 | dets_np = dets.detach().cpu().numpy() 80 | elif isinstance(dets, np.ndarray): 81 | is_tensor = False 82 | dets_np = dets 83 | else: 84 | raise TypeError( 85 | 'dets must be either a Tensor or numpy array, but got {}'.format( 86 | type(dets))) 87 | 88 | method_codes = {'linear': 1, 'gaussian': 2} 89 | if method not in method_codes: 90 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 91 | new_dets, inds = soft_nms_cpu( 92 | dets_np, 93 | iou_thr, 94 | method=method_codes[method], 95 | sigma=sigma, 96 | min_score=min_score) 97 | 98 | if is_tensor: 99 | return dets.new_tensor(new_dets), dets.new_tensor( 100 | inds, dtype=torch.long) 101 | else: 102 | return new_dets.astype(np.float32), inds.astype(np.int64) 103 | -------------------------------------------------------------------------------- /docs/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | //判断是否是cpu tensor 7 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 8 | 9 | if (dets.numel() == 0) { 10 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 11 | } 12 | //tensor.select(1, index)等效于tensor[:, index] 13 | auto x1_t = dets.select(1, 0).contiguous(); 14 | auto y1_t = dets.select(1, 1).contiguous(); 15 | auto x2_t = dets.select(1, 2).contiguous(); 16 | auto y2_t = dets.select(1, 3).contiguous(); 17 | auto scores = dets.select(1, 4).contiguous(); 18 | //计算面积 19 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 20 | //降序排序 21 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 22 | 23 | auto ndets = dets.size(0); 24 | at::Tensor suppressed_t = 25 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 26 | 27 | //这个是记录每个bbox的状态的。即有没有被前面的box给过滤掉。 28 | auto suppressed = suppressed_t.data(); 29 | auto order = order_t.data(); 30 | auto x1 = x1_t.data(); 31 | auto y1 = y1_t.data(); 32 | auto x2 = x2_t.data(); 33 | auto y2 = y2_t.data(); 34 | auto areas = areas_t.data(); 35 | 36 | for (int64_t _i = 0; _i < ndets; _i++) { 37 | auto i = order[_i]; 38 | if (suppressed[i] == 1) continue; 39 | auto ix1 = x1[i]; 40 | auto iy1 = y1[i]; 41 | auto ix2 = x2[i]; 42 | auto iy2 = y2[i]; 43 | auto iarea = areas[i]; 44 | 45 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 46 | auto j = order[_j]; 47 | if (suppressed[j] == 1) continue; 48 | auto xx1 = std::max(ix1, x1[j]); 49 | auto yy1 = std::max(iy1, y1[j]); 50 | auto xx2 = std::min(ix2, x2[j]); 51 | auto yy2 = std::min(iy2, y2[j]); 52 | 53 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 54 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 55 | auto inter = w * h; 56 | auto ovr = inter / (iarea + areas[j] - inter);//计算交并比 57 | if (ovr >= threshold) suppressed[j] = 1; 58 | } 59 | } 60 | return at::nonzero(suppressed_t == 0).squeeze(1); 61 | } 62 | 63 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 64 | at::Tensor result; 65 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 66 | result = nms_cpu_kernel(dets, threshold); 67 | }); 68 | return result; 69 | } 70 | //转换成python 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def("nms", &nms, "non-maximum suppression"); 73 | } -------------------------------------------------------------------------------- /docs/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /docs/nms/src/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | 13 | //!see https://zhuanlan.zhihu.com/p/80902998 14 | 15 | 16 | int const threadsPerBlock = sizeof(unsigned long long) * 8;//分块数量 17 | 18 | //在gpu上计算IOU 19 | __device__ inline float devIoU(float const * const a, float const * const b) { 20 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 21 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 22 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 23 | float interS = width * height; 24 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 25 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 26 | return interS / (Sa + Sb - interS); 27 | } 28 | 29 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 30 | const float *dev_boxes, unsigned long long *dev_mask) { 31 | const int row_start = blockIdx.y; 32 | const int col_start = blockIdx.x;//确定当前block的横纵坐标 33 | 34 | // if (row_start > col_start) return; 35 | 36 | const int row_size = 37 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 38 | const int col_size = 39 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 40 | ////求当前block的行长度,如果最后一个block不够除,则取余下的, 41 | //比如ceil(105/25) = 5,105 = 4 * 25 + 5最后一块高为5,此时row_size=5,其余的row_size = 25 42 | 43 | // 共享内存,加速数据读取, 44 | //同一个block有共享内存,所以先使用共享内存存下当前block全部需要读取的数据 45 | //(即box的坐标和置信度)然后就不在dev_boxes里面读数据了,而是读share memory里面的数据 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | 48 | if (threadIdx.x < col_size) { 49 | block_boxes[threadIdx.x * 5 + 0] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 51 | block_boxes[threadIdx.x * 5 + 1] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 53 | block_boxes[threadIdx.x * 5 + 2] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 55 | block_boxes[threadIdx.x * 5 + 3] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 57 | block_boxes[threadIdx.x * 5 + 4] = 58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 59 | } 60 | 61 | //为了保证线程安全,必须等所有的线程都把数据存到share memory以后,统一开始线程 62 | __syncthreads(); 63 | // 这个if判断去掉多余的thread,保证余下的块可以被正确执行 64 | // 每个block里面有row_size个线程 65 | // 每个线程i,for一个col_size的循环,计算该block里面第i个box和该block中每个列box的IOU 66 | if (threadIdx.x < row_size) { 67 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 68 | const float *cur_box = dev_boxes + cur_box_idx * 5; 69 | int i = 0; 70 | unsigned long long t = 0; 71 | int start = 0; 72 | if (row_start == col_start) { 73 | start = threadIdx.x + 1;//对角线上的block, //自己跟自己就不要计算IOU了 74 | } 75 | for (i = start; i < col_size; i++) { 76 | //主循环,求该box和所有列box的IOU,如果满足条件,则使用一个mask把该位置1 77 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 78 | t |= 1ULL << i;//掩码 79 | } 80 | } 81 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 82 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 83 | } 84 | } 85 | 86 | // boxes is a N x 5 tensor 87 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 88 | 89 | // Ensure CUDA uses the input tensor device. 90 | at::DeviceGuard guard(boxes.device()); 91 | 92 | using scalar_t = float; 93 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 94 | auto scores = boxes.select(1, 4);//tensor.select(1, index)等效于tensor[:, index] 95 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 96 | auto boxes_sorted = boxes.index_select(0, order_t); 97 | 98 | int boxes_num = boxes.size(0); 99 | 100 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 101 | 102 | scalar_t* boxes_dev = boxes_sorted.data(); 103 | 104 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 105 | 106 | unsigned long long* mask_dev = NULL; 107 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 108 | // boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 111 | 112 | 113 | // 定义blocks的数量和每个block的线程数 114 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 115 | THCCeilDiv(boxes_num, threadsPerBlock)); 116 | dim3 threads(threadsPerBlock); 117 | // 调用kernel,最后在mask_dev中求出每两个框的IoU是否超过阈值t 118 | nms_kernel<<>>(boxes_num, 119 | nms_overlap_thresh, 120 | boxes_dev, 121 | mask_dev); 122 | 123 | std::vector mask_host(boxes_num * col_blocks); 124 | THCudaCheck(cudaMemcpyAsync( 125 | &mask_host[0], 126 | mask_dev, 127 | sizeof(unsigned long long) * boxes_num * col_blocks, 128 | cudaMemcpyDeviceToHost, 129 | at::cuda::getCurrentCUDAStream() 130 | )); 131 | 132 | std::vector remv(col_blocks);// 初始是所有框都在S里面,移出标记都置为0 133 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);// 初始是所有框都在S里面,移出标记都置为0 134 | 135 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 136 | int64_t* keep_out = keep.data(); 137 | 138 | int num_to_keep = 0; 139 | for (int i = 0; i < boxes_num; i++) { 140 | int nblock = i / threadsPerBlock; //求这个box是在哪个block里面计算的 141 | int inblock = i % threadsPerBlock; //求这个box在block的哪个线程计算的 142 | 143 | // 对于每个box,如果他在S中,则加入结果集,并移出S 144 | // 并把和他的IOU大于阈值的所有box全部移出S 145 | if (!(remv[nblock] & (1ULL << inblock))) { 146 | keep_out[num_to_keep++] = i;//加入结果集操作 147 | unsigned long long *p = &mask_host[0] + i * col_blocks; 148 | for (int j = nblock; j < col_blocks; j++) { 149 | remv[j] |= p[j];//移出S操作 150 | } 151 | } 152 | } 153 | 154 | THCudaFree(state, mask_dev); 155 | // TODO improve this part 156 | return std::get<0>(order_t.index({ 157 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 158 | order_t.device(), keep.scalar_type()) 159 | }).sort(0, false)); 160 | } 161 | -------------------------------------------------------------------------------- /docs/nms/src/soft_nms_cpu.pyx: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------- 2 | # Soft-NMS: Improving Object Detection With One Line of Code 3 | # Copyright (c) University of Maryland, College Park 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Navaneeth Bodla and Bharat Singh 6 | # Modified by Kai Chen 7 | # ---------------------------------------------------------- 8 | 9 | # cython: language_level=3, boundscheck=False 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 16 | return a if a >= b else b 17 | 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 19 | return a if a <= b else b 20 | 21 | 22 | def soft_nms_cpu( 23 | np.ndarray[float, ndim=2] boxes_in, 24 | float iou_thr, 25 | unsigned int method=1, 26 | float sigma=0.5, 27 | float min_score=0.001, 28 | ): 29 | boxes = boxes_in.copy() 30 | cdef int N = boxes.shape[0] 31 | cdef float iw, ih, box_area 32 | cdef float ua 33 | cdef int pos = 0 34 | cdef float maxscore = 0 35 | cdef int maxpos = 0 36 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov 37 | inds = np.arange(N) 38 | 39 | for i in range(N): 40 | maxscore = boxes[i, 4] 41 | maxpos = i 42 | 43 | tx1 = boxes[i, 0] 44 | ty1 = boxes[i, 1] 45 | tx2 = boxes[i, 2] 46 | ty2 = boxes[i, 3] 47 | ts = boxes[i, 4] 48 | ti = inds[i] 49 | 50 | pos = i + 1 51 | # get max box 52 | while pos < N: 53 | if maxscore < boxes[pos, 4]: 54 | maxscore = boxes[pos, 4] 55 | maxpos = pos 56 | pos = pos + 1 57 | 58 | # add max box as a detection 59 | boxes[i, 0] = boxes[maxpos, 0] 60 | boxes[i, 1] = boxes[maxpos, 1] 61 | boxes[i, 2] = boxes[maxpos, 2] 62 | boxes[i, 3] = boxes[maxpos, 3] 63 | boxes[i, 4] = boxes[maxpos, 4] 64 | inds[i] = inds[maxpos] 65 | 66 | # swap ith box with position of max box 67 | boxes[maxpos, 0] = tx1 68 | boxes[maxpos, 1] = ty1 69 | boxes[maxpos, 2] = tx2 70 | boxes[maxpos, 3] = ty2 71 | boxes[maxpos, 4] = ts 72 | inds[maxpos] = ti 73 | 74 | tx1 = boxes[i, 0] 75 | ty1 = boxes[i, 1] 76 | tx2 = boxes[i, 2] 77 | ty2 = boxes[i, 3] 78 | ts = boxes[i, 4] 79 | 80 | pos = i + 1 81 | # NMS iterations, note that N changes if detection boxes fall below 82 | # threshold 83 | while pos < N: 84 | x1 = boxes[pos, 0] 85 | y1 = boxes[pos, 1] 86 | x2 = boxes[pos, 2] 87 | y2 = boxes[pos, 3] 88 | s = boxes[pos, 4] 89 | 90 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 91 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 92 | if iw > 0: 93 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 94 | if ih > 0: 95 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 96 | ov = iw * ih / ua # iou between max box and detection box 97 | 98 | if method == 1: # linear 99 | if ov > iou_thr: 100 | weight = 1 - ov 101 | else: 102 | weight = 1 103 | elif method == 2: # gaussian 104 | weight = np.exp(-(ov * ov) / sigma) 105 | else: # original NMS 106 | if ov > iou_thr: 107 | weight = 0 108 | else: 109 | weight = 1 110 | 111 | boxes[pos, 4] = weight * boxes[pos, 4] 112 | 113 | # if box score falls below threshold, discard the box by 114 | # swapping with last box update N 115 | if boxes[pos, 4] < min_score: 116 | boxes[pos, 0] = boxes[N-1, 0] 117 | boxes[pos, 1] = boxes[N-1, 1] 118 | boxes[pos, 2] = boxes[N-1, 2] 119 | boxes[pos, 3] = boxes[N-1, 3] 120 | boxes[pos, 4] = boxes[N-1, 4] 121 | inds[pos] = inds[N - 1] 122 | N = N - 1 123 | pos = pos - 1 124 | 125 | pos = pos + 1 126 | 127 | return boxes[:N], inds[:N] 128 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | from setuptools import find_packages, setup 7 | import torch 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 12 | 13 | 14 | def get_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | extensions_dir = os.path.join(this_dir, "det", "layers", "csrc") 17 | 18 | main_source = os.path.join(extensions_dir, "vision.cpp") 19 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( 21 | os.path.join(extensions_dir, "*.cu") 22 | ) 23 | 24 | sources = [main_source] + sources 25 | 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | # It's better if pytorch can do this by default .. 43 | CC = os.environ.get("CC", None) 44 | if CC is not None: 45 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 46 | 47 | sources = [os.path.join(extensions_dir, s) for s in sources] 48 | 49 | include_dirs = [extensions_dir] 50 | 51 | ext_modules = [ 52 | extension( 53 | "det._C", 54 | sources, 55 | include_dirs=include_dirs, 56 | define_macros=define_macros, 57 | extra_compile_args=extra_compile_args, 58 | ) 59 | ] 60 | 61 | return ext_modules 62 | 63 | 64 | setup( 65 | name="Det", 66 | version="0.1.1", 67 | author="HTF", 68 | url="", 69 | description="" 70 | "platform for instance-level detection tasks based on Detectron2.", 71 | packages=find_packages(exclude=("configs", "tests")), 72 | python_requires=">=3.6", 73 | install_requires=[ 74 | "termcolor>=1.1", 75 | "Pillow>=6.0", 76 | "yacs>=0.1.6", 77 | "tabulate", 78 | "cloudpickle", 79 | "matplotlib", 80 | "tqdm>4.29.0", 81 | "tensorboard", 82 | "python-Levenshtein", 83 | "Polygon3", 84 | "shapely", 85 | ], 86 | extras_require={"all": ["psutil"]}, 87 | ext_modules=get_extensions(), 88 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 89 | ) -------------------------------------------------------------------------------- /tools/compute_flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from detectron2.engine import default_argument_parser, default_setup 3 | 4 | from det.config import get_cfg 5 | from det.utils.measures import measure_model 6 | 7 | from train_net import Trainer 8 | 9 | 10 | def setup(args): 11 | """ 12 | Create configs and perform basic setups. 13 | """ 14 | cfg = get_cfg() 15 | cfg.merge_from_file(args.config_file) 16 | cfg.merge_from_list(args.opts) 17 | cfg.freeze() 18 | default_setup(cfg, args) 19 | return cfg 20 | 21 | 22 | def main(args): 23 | cfg = setup(args) 24 | 25 | model = Trainer.build_model(cfg) 26 | model.eval().cuda() 27 | input_size = (3, 512, 512) 28 | image = torch.zeros(*input_size) 29 | batched_input = {"image": image} 30 | ops, params = measure_model(model, [batched_input]) 31 | print('ops: {:.2f}G\tparams: {:.2f}M'.format(ops / 2**30, params / 2**20)) 32 | 33 | 34 | if __name__ == "__main__": 35 | args = default_argument_parser().parse_args() 36 | print("Command Line Args:", args) 37 | main(args) 38 | -------------------------------------------------------------------------------- /tools/visualize_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | import argparse 4 | import numpy as np 5 | import os 6 | from itertools import chain 7 | import cv2 8 | import tqdm 9 | from PIL import Image 10 | 11 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader 12 | from detectron2.data import detection_utils as utils 13 | from detectron2.data.build import filter_images_with_few_keypoints 14 | from detectron2.utils.logger import setup_logger 15 | from detectron2.utils.visualizer import Visualizer 16 | 17 | from det.config import get_cfg 18 | from det.data.dataset_mapper import DatasetMapperWithBasis 19 | 20 | 21 | def setup(args): 22 | cfg = get_cfg() 23 | if args.config_file: 24 | cfg.merge_from_file(args.config_file) 25 | cfg.merge_from_list(args.opts) 26 | cfg.freeze() 27 | return cfg 28 | 29 | 30 | def parse_args(in_args=None): 31 | parser = argparse.ArgumentParser(description="Visualize ground-truth data") 32 | parser.add_argument( 33 | "--source", 34 | choices=["annotation", "dataloader"], 35 | required=True, 36 | help="visualize the annotations or the data loader (with pre-processing)", 37 | ) 38 | parser.add_argument("--config-file", metavar="FILE", help="path to config file") 39 | parser.add_argument("--output-dir", default="./", help="path to output directory") 40 | parser.add_argument("--show", action="store_true", help="show output in a window") 41 | parser.add_argument( 42 | "--opts", 43 | help="Modify config options using the command-line", 44 | default=[], 45 | nargs=argparse.REMAINDER, 46 | ) 47 | return parser.parse_args(in_args) 48 | 49 | 50 | if __name__ == "__main__": 51 | args = parse_args() 52 | logger = setup_logger() 53 | logger.info("Arguments: " + str(args)) 54 | cfg = setup(args) 55 | 56 | dirname = args.output_dir 57 | os.makedirs(dirname, exist_ok=True) 58 | metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) 59 | 60 | def output(vis, fname): 61 | if args.show: 62 | print(fname) 63 | cv2.imshow("window", vis.get_image()[:, :, ::-1]) 64 | cv2.waitKey() 65 | else: 66 | filepath = os.path.join(dirname, fname) 67 | print("Saving to {} ...".format(filepath)) 68 | vis.save(filepath) 69 | 70 | scale = 2.0 if args.show else 1.0 71 | if args.source == "dataloader": 72 | mapper = DatasetMapperWithBasis(cfg, True) 73 | train_data_loader = build_detection_train_loader(cfg, mapper) 74 | for batch in train_data_loader: 75 | for per_image in batch: 76 | # Pytorch tensor is in (C, H, W) format 77 | img = per_image["image"].permute(1, 2, 0) 78 | if cfg.INPUT.FORMAT == "BGR": 79 | img = img[:, :, [2, 1, 0]] 80 | else: 81 | img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) 82 | 83 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 84 | target_fields = per_image["instances"].get_fields() 85 | labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] 86 | vis = visualizer.overlay_instances( 87 | labels=labels, 88 | boxes=target_fields.get("gt_boxes", None), 89 | masks=target_fields.get("gt_masks", None), 90 | keypoints=target_fields.get("gt_keypoints", None), 91 | ) 92 | output(vis, str(per_image["image_id"]) + ".jpg") 93 | else: 94 | dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) 95 | if cfg.MODEL.KEYPOINT_ON: 96 | dicts = filter_images_with_few_keypoints(dicts, 1) 97 | for dic in tqdm.tqdm(dicts): 98 | img = utils.read_image(dic["file_name"], "RGB") 99 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 100 | vis = visualizer.draw_dataset_dict(dic) 101 | output(vis, os.path.basename(dic["file_name"])) --------------------------------------------------------------------------------