├── .gitignore
├── utils
    ├── __init__.py
    ├── registry.py
    ├── serialize.py
    ├── file_io.py
    ├── memory.py
    ├── colormap.py
    ├── env.py
    └── analysis.py
├── export
    ├── __init__.py
    ├── torchscript.py
    └── caffe2_patch.py
├── model_zoo
    ├── configs
    │   ├── COCO-Detection
    │   │   ├── retinanet_R_50_FPN_1x.yaml
    │   │   ├── faster_rcnn_R_50_C4_1x.yaml
    │   │   ├── faster_rcnn_R_50_FPN_1x.yaml
    │   │   ├── faster_rcnn_R_50_DC5_1x.yaml
    │   │   ├── retinanet_R_50_FPN_3x.yaml
    │   │   ├── retinanet_R_101_FPN_3x.yaml
    │   │   ├── faster_rcnn_R_50_C4_3x.yaml
    │   │   ├── faster_rcnn_R_101_C4_3x.yaml
    │   │   ├── faster_rcnn_R_101_FPN_3x.yaml
    │   │   ├── faster_rcnn_R_50_FPN_3x.yaml
    │   │   ├── faster_rcnn_R_50_DC5_3x.yaml
    │   │   ├── faster_rcnn_R_101_DC5_3x.yaml
    │   │   ├── rpn_R_50_FPN_1x.yaml
    │   │   ├── rpn_R_50_C4_1x.yaml
    │   │   ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
    │   │   └── fast_rcnn_R_50_FPN_1x.yaml
    │   ├── COCO-Keypoints
    │   │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   │   ├── keypoint_rcnn_R_50_FPN_3x.yaml
    │   │   ├── keypoint_rcnn_R_101_FPN_3x.yaml
    │   │   ├── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
    │   │   └── Base-Keypoint-RCNN-FPN.yaml
    │   ├── COCO-PanopticSegmentation
    │   │   ├── panoptic_fpn_R_50_1x.yaml
    │   │   ├── panoptic_fpn_R_50_3x.yaml
    │   │   ├── panoptic_fpn_R_101_3x.yaml
    │   │   └── Base-Panoptic-FPN.yaml
    │   ├── COCO-InstanceSegmentation
    │   │   ├── mask_rcnn_R_50_C4_1x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   │   ├── mask_rcnn_R_50_DC5_1x.yaml
    │   │   ├── mask_rcnn_R_50_C4_3x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_3x.yaml
    │   │   ├── mask_rcnn_R_101_C4_3x.yaml
    │   │   ├── mask_rcnn_R_101_FPN_3x.yaml
    │   │   ├── mask_rcnn_R_101_DC5_3x.yaml
    │   │   ├── mask_rcnn_R_50_DC5_3x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_1x_giou.yaml
    │   │   └── mask_rcnn_X_101_32x8d_FPN_3x.yaml
    │   ├── quick_schedules
    │   │   ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
    │   │   ├── rpn_R_50_FPN_inference_acc_test.yaml
    │   │   ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml
    │   │   ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml
    │   │   ├── retinanet_R_50_FPN_inference_acc_test.yaml
    │   │   ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   │   ├── mask_rcnn_R_50_C4_inference_acc_test.yaml
    │   │   ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml
    │   │   ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
    │   │   ├── rpn_R_50_FPN_instant_test.yaml
    │   │   ├── mask_rcnn_R_50_C4_instant_test.yaml
    │   │   ├── mask_rcnn_R_50_FPN_instant_test.yaml
    │   │   ├── retinanet_R_50_FPN_instant_test.yaml
    │   │   ├── keypoint_rcnn_R_50_FPN_instant_test.yaml
    │   │   ├── semantic_R_50_FPN_inference_acc_test.yaml
    │   │   ├── panoptic_fpn_R_50_inference_acc_test.yaml
    │   │   ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml
    │   │   ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   │   ├── panoptic_fpn_R_50_instant_test.yaml
    │   │   ├── semantic_R_50_FPN_instant_test.yaml
    │   │   ├── mask_rcnn_R_50_FPN_training_acc_test.yaml
    │   │   ├── fast_rcnn_R_50_FPN_instant_test.yaml
    │   │   ├── semantic_R_50_FPN_training_acc_test.yaml
    │   │   ├── mask_rcnn_R_50_C4_training_acc_test.yaml
    │   │   ├── panoptic_fpn_R_50_training_acc_test.yaml
    │   │   ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml
    │   │   └── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
    │   ├── Misc
    │   │   ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
    │   │   ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
    │   │   ├── cascade_mask_rcnn_R_50_FPN_1x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
    │   │   ├── semantic_R_50_FPN_1x.yaml
    │   │   ├── cascade_mask_rcnn_R_50_FPN_3x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_3x_gn.yaml
    │   │   ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml
    │   │   ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   │   ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
    │   │   ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
    │   │   ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
    │   │   ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
    │   │   └── cascade_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
    │   ├── Base-RCNN-C4.yaml
    │   ├── PascalVOC-Detection
    │   │   ├── faster_rcnn_R_50_C4.yaml
    │   │   └── faster_rcnn_R_50_FPN.yaml
    │   ├── Detectron1-Comparisons
    │   │   ├── faster_rcnn_R_50_FPN_noaug_1x.yaml
    │   │   ├── mask_rcnn_R_50_FPN_noaug_1x.yaml
    │   │   └── keypoint_rcnn_R_50_FPN_1x.yaml
    │   ├── LVISv0.5-InstanceSegmentation
    │   │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    │   ├── LVISv1-InstanceSegmentation
    │   │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    │   ├── Base-RCNN-DilatedC5.yaml
    │   ├── Base-RetinaNet.yaml
    │   ├── Cityscapes
    │   │   └── mask_rcnn_R_50_FPN.yaml
    │   └── Base-RCNN-FPN.yaml
    └── __init__.py
├── data
    ├── datasets
    │   ├── register_coco.py
    │   ├── __init__.py
    │   └── pascal_voc.py
    ├── transforms
    │   └── __init__.py
    ├── samplers
    │   ├── __init__.py
    │   └── grouped_batch_sampler.py
    └── __init__.py
├── modeling
    ├── proposal_generator
    │   ├── __init__.py
    │   └── build.py
    ├── backbone
    │   ├── __init__.py
    │   ├── build.py
    │   └── backbone.py
    ├── meta_arch
    │   ├── __init__.py
    │   └── build.py
    ├── roi_heads
    │   ├── __init__.py
    │   └── box_head.py
    ├── __init__.py
    ├── sampling.py
    ├── postprocessing.py
    └── matcher.py
├── projects
    ├── deeplab
    │   ├── __init__.py
    │   ├── build_solver.py
    │   ├── config.py
    │   ├── loss.py
    │   ├── lr_scheduler.py
    │   └── resnet.py
    ├── point_rend
    │   ├── __init__.py
    │   ├── config.py
    │   ├── color_augmentation.py
    │   ├── coarse_mask_head.py
    │   └── semantic_seg.py
    ├── panoptic_deeplab
    │   ├── __init__.py
    │   ├── config.py
    │   └── dataset_mapper.py
    └── __init__.py
├── solver
    ├── __init__.py
    └── lr_scheduler.py
├── __init__.py
├── config
    └── __init__.py
├── checkpoint
    ├── __init__.py
    ├── detection_checkpoint.py
    └── catalog.py
├── engine
    ├── __init__.py
    └── launch.py
├── structures
    ├── __init__.py
    └── image_list.py
├── evaluation
    ├── __init__.py
    ├── testing.py
    └── fast_eval_api.py
└── layers
    ├── __init__.py
    ├── rotated_boxes.py
    ├── shape_spec.py
    ├── blocks.py
    ├── roi_align_rotated.py
    ├── wrappers.py
    ├── roi_align.py
    └── aspp.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.so
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/export/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .api import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/data/datasets/register_coco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .coco import register_coco_instances  # noqa
3 | from .coco_panoptic import register_coco_panoptic_separated  # noqa
4 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/utils/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | # Keep this module for backward compatibility.
4 | from fvcore.common.registry import Registry  # noqa
5 | 
6 | __all__ = ["Registry"]
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
4 | 
5 | __all__ = list(globals().keys())
6 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/projects/deeplab/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build_solver import build_lr_scheduler
3 | from .config import add_deeplab_config
4 | from .resnet import build_resnet_deeplab_backbone
5 | from .semantic_seg import DeepLabV3Head, DeepLabV3PlusHead
6 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     TRAIN_ON_PRED_BOXES: True
5 | TEST:
6 |   EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     POST_NMS_TOPK_TEST: 2000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from .utils.env import setup_environment
 4 | 
 5 | setup_environment()
 6 | 
 7 | 
 8 | # This line will be programatically read/write by setup.py.
 9 | # Leave them at the bottom of this file and don't touch them.
10 | __version__ = "0.3"
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8 |     DEFORM_MODULATED: False
9 | 


--------------------------------------------------------------------------------
/projects/point_rend/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .config import add_pointrend_config
3 | from .coarse_mask_head import CoarseMaskHead
4 | from .roi_heads import PointRendROIHeads
5 | from .semantic_seg import PointRendSemSegHead
6 | from .color_augmentation import ColorAugSSDTransform
7 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     PRE_NMS_TOPK_TEST: 12000
10 |     POST_NMS_TOPK_TEST: 2000
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_BOX_HEAD:
 8 |     CLS_AGNOSTIC_BBOX_REG: True
 9 |   ROI_MASK_HEAD:
10 |     CLS_AGNOSTIC_MASK: True
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
8 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
 2 | DATASETS:
 3 |   TRAIN: ("coco_2017_val_100",)
 4 |   TEST: ("coco_2017_val_100",)
 5 | SOLVER:
 6 |   BASE_LR: 0.005
 7 |   STEPS: (30,)
 8 |   MAX_ITER: 40
 9 |   IMS_PER_BATCH: 4
10 | DATALOADER:
11 |   NUM_WORKERS: 2
12 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
8 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
8 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | 


--------------------------------------------------------------------------------
/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from fvcore.transforms.transform import Transform, TransformList  # order them first
3 | from fvcore.transforms.transform import *
4 | from .transform import *
5 | from .augmentation import *
6 | from .augmentation_impl import *
7 | 
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
8 | 


--------------------------------------------------------------------------------
/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | 
 5 | __all__ = [
 6 |     "GroupedBatchSampler",
 7 |     "TrainingSampler",
 8 |     "InferenceSampler",
 9 |     "RepeatFactorTrainingSampler",
10 | ]
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   MASK_ON: True
 5 |   SEM_SEG_HEAD:
 6 |     LOSS_WEIGHT: 0.5
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_separated",)
 9 |   TEST: ("coco_2017_val_panoptic_separated",)
10 | DATALOADER:
11 |   FILTER_EMPTY_ANNOTATIONS: False
12 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
 8 |     DEFORM_MODULATED: False
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
8 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .compat import downgrade_config, upgrade_config
 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
 4 | 
 5 | __all__ = [
 6 |     "CfgNode",
 7 |     "get_cfg",
 8 |     "global_cfg",
 9 |     "set_global_cfg",
10 |     "downgrade_config",
11 |     "upgrade_config",
12 |     "configurable",
13 | ]
14 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
4 | DATASETS:
5 |   TEST: ("keypoints_coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
8 | 


--------------------------------------------------------------------------------
/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # File:
 4 | 
 5 | 
 6 | from . import catalog as _UNUSED  # register the handler
 7 | from .detection_checkpoint import DetectionCheckpointer
 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
 9 | 
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 | 


--------------------------------------------------------------------------------
/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from .launch import *
 4 | from .train_loop import *
 5 | 
 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 7 | 
 8 | 
 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
10 | # but still make them available here
11 | from .hooks import *
12 | from .defaults import *
13 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   RPN:
 8 |     BBOX_REG_LOSS_TYPE: "giou"
 9 |     BBOX_REG_LOSS_WEIGHT: 2.0
10 |   ROI_BOX_HEAD:
11 |     BBOX_REG_LOSS_TYPE: "giou"
12 |     BBOX_REG_LOSS_WEIGHT: 10.0
13 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   STEPS: (30,)
 9 |   MAX_ITER: 40
10 |   BASE_LR: 0.005
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/projects/panoptic_deeplab/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .config import add_panoptic_deeplab_config
 3 | from .dataset_mapper import PanopticDeeplabDatasetMapper
 4 | from .panoptic_seg import (
 5 |     PanopticDeepLab,
 6 |     INS_EMBED_BRANCHES_REGISTRY,
 7 |     build_ins_embed_branch,
 8 |     PanopticDeepLabSemSegHead,
 9 |     PanopticDeepLabInsEmbedHead,
10 | )
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   BASE_LR: 0.005
 9 |   STEPS: (30,)
10 |   MAX_ITER: 40
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (210000, 250000)
12 |   MAX_ITER: 270000
13 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | SOLVER:
14 |   STEPS: (210000, 250000)
15 |   MAX_ITER: 270000
16 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("keypoints_coco_2017_val_100",)
 7 |   TEST: ("keypoints_coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
 3 | 
 4 | from .backbone import Backbone
 5 | from .fpn import FPN
 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
 7 | 
 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 9 | # TODO can expose more resnet blocks after careful consideration
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/model_zoo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | """
 3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and
 4 | optionally load pre-trained weights as released in
 5 | `MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_.
 6 | """
 7 | from .model_zoo import get, get_config_file, get_checkpoint_url
 8 | 
 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file"]
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
 9 | TEST:
10 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
11 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100_panoptic_separated",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 |     #NUM_CLASSES: 15
 9 | DATASETS:
10 |   TRAIN: ("coco_2017_train",)
11 |   TEST: ("coco_2017_val",)
12 | SOLVER:
13 |   IMS_PER_BATCH: 16
14 |   BASE_LR: 0.02
15 |   STEPS: (60000, 80000)
16 |   MAX_ITER: 90000
17 | INPUT:
18 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
19 | VERSION: 2
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 |   CLIP_GRADIENTS:
14 |     ENABLED: True
15 |     CLIP_TYPE: "value"
16 |     CLIP_VALUE: 1.0
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "GN"
 8 |     STRIDE_IN_1X1: False
 9 |   FPN:
10 |     NORM: "GN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "GN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "GN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | 


--------------------------------------------------------------------------------
/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | from .build import META_ARCH_REGISTRY, build_model  # isort:skip
 5 | 
 6 | from .panoptic_fpn import PanopticFPN
 7 | 
 8 | # import all the meta_arch, so they will be registered
 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork
10 | from .retinanet import RetinaNet
11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
12 | 
13 | 
14 | __all__ = list(globals().keys())
15 | 


--------------------------------------------------------------------------------
/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances
 3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
 5 | from .pascal_voc import load_voc_instances, register_pascal_voc
 6 | from . import builtin as _builtin  # ensure the builtin datasets are registered
 7 | 
 8 | 
 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 4 | DATASETS:
 5 |   TEST: ("coco_2017_val_100",)
 6 | TEST:
 7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
 8 |   AUG:
 9 |     ENABLED: True
10 |     MIN_SIZES: (700, 800)  # to save some time
11 | 


--------------------------------------------------------------------------------
/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
 3 | from .image_list import ImageList
 4 | 
 5 | from .instances import Instances
 6 | from .keypoints import Keypoints, heatmaps_to_keypoints
 7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask
 8 | from .rotated_boxes import RotatedBoxes
 9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
10 | 
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
12 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_100_panoptic_separated",)
12 |   TEST: ("coco_2017_val_100_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.005
15 |   STEPS: (30,)
16 |   MAX_ITER: 40
17 |   IMS_PER_BATCH: 4
18 | DATALOADER:
19 |   NUM_WORKERS: 1
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | SOLVER:
13 |   BASE_LR: 0.005
14 |   STEPS: (30,)
15 |   MAX_ITER: 40
16 |   IMS_PER_BATCH: 4
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 | INPUT:
16 |   # no scale augmentation
17 |   MIN_SIZE_TRAIN: (800, )
18 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "SyncBN"
 8 |     STRIDE_IN_1X1: True
 9 |   FPN:
10 |     NORM: "SyncBN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "SyncBN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "SyncBN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | TEST:
23 |   PRECISE_BN:
24 |     ENABLED: True
25 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   # Train from random initialization.
 4 |   WEIGHTS: ""
 5 |   # It makes sense to divide by STD when training from scratch
 6 |   # But it seems to make no difference on the results and C2's models didn't do this.
 7 |   # So we keep things consistent with C2.
 8 |   # PIXEL_STD: [57.375, 57.12, 58.395]
 9 |   MASK_ON: True
10 |   BACKBONE:
11 |     FREEZE_AT: 0
12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
13 | # to learn what you need for training from scratch.
14 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   KEYPOINT_ON: True
 4 |   ROI_HEADS:
 5 |     NUM_CLASSES: 1
 6 |   ROI_BOX_HEAD:
 7 |     SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
 8 |   RPN:
 9 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
10 |     # 1000 proposals per-image is found to hurt box AP.
11 |     # Therefore we increase it to 1500 per-image.
12 |     POST_NMS_TOPK_TRAIN: 1500
13 | DATASETS:
14 |   TRAIN: ("keypoints_coco_2017_train",)
15 |   TEST: ("keypoints_coco_2017_val",)
16 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   WARMUP_FACTOR: 0.3333333
17 |   WARMUP_ITERS: 100
18 |   STEPS: (5500, 5800)
19 |   MAX_ITER: 6000
20 | TEST:
21 |   EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
22 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 7 |   TEST: ("coco_2017_val_100",)
 8 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 9 | SOLVER:
10 |   BASE_LR: 0.005
11 |   STEPS: (30,)
12 |   MAX_ITER: 40
13 |   IMS_PER_BATCH: 4
14 | DATALOADER:
15 |   NUM_WORKERS: 2
16 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 |   ROI_MASK_HEAD:
16 |     POOLER_SAMPLING_RATIO: 2
17 |     POOLER_TYPE: "ROIAlign"
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WARMUP_FACTOR: 0.001
13 |   WARMUP_ITERS: 300
14 |   STEPS: (5500,)
15 |   MAX_ITER: 7000
16 | TEST:
17 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   IMS_PER_BATCH: 8  # base uses 16
17 |   WARMUP_FACTOR: 0.33333
18 |   WARMUP_ITERS: 100
19 |   STEPS: (11000, 11600)
20 |   MAX_ITER: 12000
21 | TEST:
22 |   EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
23 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import transforms  # isort:skip
 3 | 
 4 | from .build import (
 5 |     build_batch_data_loader,
 6 |     build_detection_test_loader,
 7 |     build_detection_train_loader,
 8 |     get_detection_dataset_dicts,
 9 |     load_proposals_into_dataset,
10 |     print_instances_class_histogram,
11 | )
12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata
13 | from .common import DatasetFromList, MapDataset
14 | from .dataset_mapper import DatasetMapper
15 | 
16 | # ensure the builtin datasets are registered
17 | from . import datasets, samplers  # isort:skip
18 | 
19 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
20 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_panoptic_separated",)
12 |   TEST: ("coco_2017_val_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.01
15 |   WARMUP_FACTOR: 0.001
16 |   WARMUP_ITERS: 500
17 |   STEPS: (5500,)
18 |   MAX_ITER: 7000
19 | TEST:
20 |   EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
21 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
 3 | from .coco_evaluation import COCOEvaluator
 4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator
 5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
 6 | from .lvis_evaluation import LVISEvaluator
 7 | from .panoptic_evaluation import COCOPanopticEvaluator
 8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
 9 | from .sem_seg_evaluation import SemSegEvaluator
10 | from .testing import print_csv_format, verify_results
11 | 
12 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
13 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
 3 | from .deform_conv import DeformConv, ModulatedDeformConv
 4 | from .mask_ops import paste_masks_in_image
 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
 6 | from .roi_align import ROIAlign, roi_align
 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
 8 | from .shape_spec import ShapeSpec
 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple
10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
11 | from .aspp import ASPP
12 | 
13 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
14 | 


--------------------------------------------------------------------------------
/model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   LOAD_PROPOSALS: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "PrecomputedProposals"
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 |   TEST: ("coco_2017_val",)
14 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 |   # proposals are part of the dataset_dicts, and take a lot of RAM
17 |   NUM_WORKERS: 2
18 | 


--------------------------------------------------------------------------------
/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from __future__ import absolute_import, division, print_function, unicode_literals
 3 | 
 4 | from detectron2 import _C
 5 | 
 6 | 
 7 | def pairwise_iou_rotated(boxes1, boxes2):
 8 |     """
 9 |     Return intersection-over-union (Jaccard index) of boxes.
10 | 
11 |     Both sets of boxes are expected to be in
12 |     (x_center, y_center, width, height, angle) format.
13 | 
14 |     Arguments:
15 |         boxes1 (Tensor[N, 5])
16 |         boxes2 (Tensor[M, 5])
17 | 
18 |     Returns:
19 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
20 |             IoU values for every element in boxes1 and boxes2
21 |     """
22 |     return _C.box_iou_rotated(boxes1, boxes2)
23 | 


--------------------------------------------------------------------------------
/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 7 |     """
 8 |     A simple structure that contains basic shape specification about a tensor.
 9 |     It is often used as the auxiliary inputs/outputs of models,
10 |     to complement the lack of shape inference ability among pytorch modules.
11 | 
12 |     Attributes:
13 |         channels:
14 |         height:
15 |         width:
16 |         stride:
17 |     """
18 | 
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1230
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v0.5_train",)
18 |   TEST: ("lvis_v0.5_val",)
19 | TEST:
20 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
23 |   REPEAT_THRESHOLD: 0.001
24 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml:
--------------------------------------------------------------------------------
 1 | # A large PanopticFPN for demo purposes.
 2 | # Use GN on backbone to support semantic seg.
 3 | # Use Cascade + Deform Conv to improve localization.
 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
 5 | MODEL:
 6 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
 7 |   RESNETS:
 8 |     DEPTH: 101
 9 |     NORM: "GN"
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |     STRIDE_IN_1X1: False
12 |   FPN:
13 |     NORM: "GN"
14 |   ROI_HEADS:
15 |     NAME: CascadeROIHeads
16 |   ROI_BOX_HEAD:
17 |     CLS_AGNOSTIC_BBOX_REG: True
18 |   ROI_MASK_HEAD:
19 |     NORM: "GN"
20 |   RPN:
21 |     POST_NMS_TOPK_TRAIN: 2000
22 | SOLVER:
23 |   STEPS: (105000, 125000)
24 |   MAX_ITER: 135000
25 |   IMS_PER_BATCH: 32
26 |   BASE_LR: 0.04
27 | 


--------------------------------------------------------------------------------
/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | 
 4 | from detectron2.utils.registry import Registry
 5 | 
 6 | META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
 7 | META_ARCH_REGISTRY.__doc__ = """
 8 | Registry for meta-architectures, i.e. the whole model.
 9 | 
10 | The registered object will be called with `obj(cfg)`
11 | and expected to return a `nn.Module` object.
12 | """
13 | 
14 | 
15 | def build_model(cfg):
16 |     """
17 |     Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
18 |     Note that it does not load any weights from ``cfg``.
19 |     """
20 |     meta_arch = cfg.MODEL.META_ARCHITECTURE
21 |     model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
22 |     model.to(torch.device(cfg.MODEL.DEVICE))
23 |     return model
24 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 |     SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 |   TRAIN: ("coco_2017_train",)
17 |   TEST: ("coco_2017_val",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 16
20 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
21 |   STEPS: (60000, 80000)
22 |   MAX_ITER: 90000
23 | INPUT:
24 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
25 | VERSION: 2
26 | 


--------------------------------------------------------------------------------
/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead
 3 | from .keypoint_head import (
 4 |     ROI_KEYPOINT_HEAD_REGISTRY,
 5 |     build_keypoint_head,
 6 |     BaseKeypointRCNNHead,
 7 |     KRCNNConvDeconvUpsampleHead,
 8 | )
 9 | from .mask_head import (
10 |     ROI_MASK_HEAD_REGISTRY,
11 |     build_mask_head,
12 |     BaseMaskRCNNHead,
13 |     MaskRCNNConvUpsampleHead,
14 | )
15 | from .roi_heads import (
16 |     ROI_HEADS_REGISTRY,
17 |     ROIHeads,
18 |     Res5ROIHeads,
19 |     StandardROIHeads,
20 |     build_roi_heads,
21 |     select_foreground_proposals,
22 | )
23 | from .rotated_fast_rcnn import RROIHeads
24 | from .fast_rcnn import FastRCNNOutputLayers
25 | 
26 | from . import cascade_rcnn  # isort:skip
27 | 
28 | __all__ = list(globals().keys())
29 | 


--------------------------------------------------------------------------------
/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1203
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v1_train",)
18 |   TEST: ("lvis_v1_val",)
19 | SOLVER:
20 |   STEPS: (120000, 160000)
21 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
22 | TEST:
23 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
24 | DATALOADER:
25 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
26 |   REPEAT_THRESHOLD: 0.001
27 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |   ROI_BOX_HEAD:
14 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
15 |   RPN:
16 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
17 | DATASETS:
18 |   TRAIN: ("keypoints_coco_2017_val",)
19 |   TEST: ("keypoints_coco_2017_val",)
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | SOLVER:
23 |   WARMUP_FACTOR: 0.33333333
24 |   WARMUP_ITERS: 100
25 |   STEPS: (5500, 5800)
26 |   MAX_ITER: 6000
27 | TEST:
28 |   EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
29 | 


--------------------------------------------------------------------------------
/modeling/proposal_generator/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.utils.registry import Registry
 3 | 
 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
 6 | Registry for proposal generator, which produces object proposals from feature maps.
 7 | 
 8 | The registered object will be called with `obj(cfg, input_shape)`.
 9 | The call should return a `nn.Module` object.
10 | """
11 | 
12 | from . import rpn, rrpn  # noqa F401 isort:skip
13 | 
14 | 
15 | def build_proposal_generator(cfg, input_shape):
16 |     """
17 |     Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
18 |     The name can be "PrecomputedProposals" to use no proposal generator.
19 |     """
20 |     name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
21 |     if name == "PrecomputedProposals":
22 |         return None
23 | 
24 |     return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
25 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 152
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |   ROI_HEADS:
12 |     NAME: "CascadeROIHeads"
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_CONV: 4
16 |     NUM_FC: 1
17 |     NORM: "GN"
18 |     CLS_AGNOSTIC_BBOX_REG: True
19 |   ROI_MASK_HEAD:
20 |     NUM_CONV: 8
21 |     NORM: "GN"
22 |   RPN:
23 |     POST_NMS_TOPK_TRAIN: 2000
24 | SOLVER:
25 |   IMS_PER_BATCH: 128
26 |   STEPS: (35000, 45000)
27 |   MAX_ITER: 50000
28 |   BASE_LR: 0.16
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 864)
31 |   MIN_SIZE_TRAIN_SAMPLING: "range"
32 |   MAX_SIZE_TRAIN: 1440
33 |   CROP:
34 |     ENABLED: True
35 | TEST:
36 |   EVAL_PERIOD: 2500
37 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Misc/cascade_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 152
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |   ROI_HEADS:
12 |     NAME: "CascadeROIHeads"
13 |     NUM_CLASSES: 15
14 |   ROI_BOX_HEAD:
15 |     NAME: "FastRCNNConvFCHead"
16 |     NUM_CONV: 4
17 |     NUM_FC: 1
18 |     NORM: "GN"
19 |     CLS_AGNOSTIC_BBOX_REG: True
20 |   ROI_MASK_HEAD:
21 |     NUM_CONV: 8
22 |     NORM: "GN"
23 |   RPN:
24 |     POST_NMS_TOPK_TRAIN: 2000
25 | SOLVER:
26 |   IMS_PER_BATCH: 128
27 |   STEPS: (35000, 45000)
28 |   MAX_ITER: 50000
29 |   BASE_LR: 0.16
30 | INPUT:
31 |   MIN_SIZE_TRAIN: (640, 864)
32 |   MIN_SIZE_TRAIN_SAMPLING: "range"
33 |   MAX_SIZE_TRAIN: 1440
34 |   CROP:
35 |     ENABLED: True
36 | TEST:
37 |   EVAL_PERIOD: 2500
38 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1
 9 |   ROI_KEYPOINT_HEAD:
10 |     POOLER_RESOLUTION: 14
11 |     POOLER_SAMPLING_RATIO: 2
12 |     POOLER_TYPE: "ROIAlign"
13 |   # Detectron1 uses smooth L1 loss with some magic beta values.
14 |   # The defaults are changed to L1 loss in Detectron2.
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0
17 |     POOLER_SAMPLING_RATIO: 2
18 |     POOLER_TYPE: "ROIAlign"
19 |   RPN:
20 |     SMOOTH_L1_BETA: 0.1111
21 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
22 |     # 1000 proposals per-image is found to hurt box AP.
23 |     # Therefore we increase it to 1500 per-image.
24 |     POST_NMS_TOPK_TRAIN: 1500
25 | DATASETS:
26 |   TRAIN: ("keypoints_coco_2017_train",)
27 |   TEST: ("keypoints_coco_2017_val",)
28 | 


--------------------------------------------------------------------------------
/projects/deeplab/build_solver.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | 
 4 | from detectron2.config import CfgNode
 5 | from detectron2.solver import build_lr_scheduler as build_d2_lr_scheduler
 6 | 
 7 | from .lr_scheduler import WarmupPolyLR
 8 | 
 9 | 
10 | def build_lr_scheduler(
11 |     cfg: CfgNode, optimizer: torch.optim.Optimizer
12 | ) -> torch.optim.lr_scheduler._LRScheduler:
13 |     """
14 |     Build a LR scheduler from config.
15 |     """
16 |     name = cfg.SOLVER.LR_SCHEDULER_NAME
17 |     if name == "WarmupPolyLR":
18 |         return WarmupPolyLR(
19 |             optimizer,
20 |             cfg.SOLVER.MAX_ITER,
21 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
22 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
23 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
24 |             power=cfg.SOLVER.POLY_LR_POWER,
25 |             constant_ending=cfg.SOLVER.POLY_LR_CONSTANT_ENDING,
26 |         )
27 |     else:
28 |         return build_d2_lr_scheduler(cfg, optimizer)
29 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   # For better, more stable performance initialize from COCO
 5 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 6 |   MASK_ON: True
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 8
 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A
10 | # But there are some differences, e.g., we did not initialize the output
11 | # layer using the corresponding classes from COCO
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
14 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
15 |   MIN_SIZE_TEST: 1024
16 |   MAX_SIZE_TRAIN: 2048
17 |   MAX_SIZE_TEST: 2048
18 | DATASETS:
19 |   TRAIN: ("cityscapes_fine_instance_seg_train",)
20 |   TEST: ("cityscapes_fine_instance_seg_val",)
21 | SOLVER:
22 |   BASE_LR: 0.01
23 |   STEPS: (18000,)
24 |   MAX_ITER: 24000
25 |   IMS_PER_BATCH: 8
26 | TEST:
27 |   EVAL_PERIOD: 8000
28 | 


--------------------------------------------------------------------------------
/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
14 |     LOSS_WEIGHT: 4.0
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
17 |   RPN:
18 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
19 | DATASETS:
20 |   TRAIN: ("keypoints_coco_2017_val",)
21 |   TEST: ("keypoints_coco_2017_val",)
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24 | SOLVER:
25 |   WARMUP_FACTOR: 0.33333333
26 |   WARMUP_ITERS: 100
27 |   STEPS: (5500, 5800)
28 |   MAX_ITER: 6000
29 | TEST:
30 |   EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
31 | 


--------------------------------------------------------------------------------
/utils/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 | 
15 |     def __init__(self, obj):
16 |         self._obj = obj
17 | 
18 |     def __reduce__(self):
19 |         s = cloudpickle.dumps(self._obj)
20 |         return cloudpickle.loads, (s,)
21 | 
22 |     def __call__(self, *args, **kwargs):
23 |         return self._obj(*args, **kwargs)
24 | 
25 |     def __getattr__(self, attr):
26 |         # Ensure that the wrapped object can be used seamlessly as the previous object.
27 |         if attr not in ["_obj"]:
28 |             return getattr(self._obj, attr)
29 |         return getattr(self, attr)
30 | 


--------------------------------------------------------------------------------
/modeling/backbone/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.utils.registry import Registry
 4 | 
 5 | from .backbone import Backbone
 6 | 
 7 | BACKBONE_REGISTRY = Registry("BACKBONE")
 8 | BACKBONE_REGISTRY.__doc__ = """
 9 | Registry for backbones, which extract feature maps from images
10 | 
11 | The registered object must be a callable that accepts two arguments:
12 | 
13 | 1. A :class:`detectron2.config.CfgNode`
14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
15 | 
16 | Registered object must return instance of :class:`Backbone`.
17 | """
18 | 
19 | 
20 | def build_backbone(cfg, input_shape=None):
21 |     """
22 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
23 | 
24 |     Returns:
25 |         an instance of :class:`Backbone`
26 |     """
27 |     if input_shape is None:
28 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
29 | 
30 |     backbone_name = cfg.MODEL.BACKBONE.NAME
31 |     backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
32 |     assert isinstance(backbone, Backbone)
33 |     return backbone
34 | 


--------------------------------------------------------------------------------
/projects/deeplab/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | 
 5 | def add_deeplab_config(cfg):
 6 |     """
 7 |     Add config for DeepLab.
 8 |     """
 9 |     # We retry random cropping until no single category in semantic segmentation GT occupies more
10 |     # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
11 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
12 |     # Used for `poly` learning rate schedule.
13 |     cfg.SOLVER.POLY_LR_POWER = 0.9
14 |     cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0
15 |     # Loss type, choose from `cross_entropy`, `hard_pixel_mining`.
16 |     cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE = "hard_pixel_mining"
17 |     # DeepLab settings
18 |     cfg.MODEL.SEM_SEG_HEAD.PROJECT_FEATURES = ["res2"]
19 |     cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS = [48]
20 |     cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256
21 |     cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS = [6, 12, 18]
22 |     cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT = 0.1
23 |     # Backbone new configs
24 |     cfg.MODEL.RESNETS.RES4_DILATION = 1
25 |     cfg.MODEL.RESNETS.RES5_MULTI_GRID = [1, 2, 4]
26 |     # ResNet stem type from: `basic`, `deeplab`
27 |     cfg.MODEL.RESNETS.STEM_TYPE = "deeplab"
28 | 


--------------------------------------------------------------------------------
/utils/file_io.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase
 2 | 
 3 | __all__ = ["PathManager", "PathHandler"]
 4 | 
 5 | 
 6 | PathManager = PathManagerBase()
 7 | """
 8 | This is a detectron2 project-specific PathManager.
 9 | We try to stay away from global PathManager in fvcore as it
10 | introduces potential conflicts among other libraries.
11 | """
12 | 
13 | 
14 | class Detectron2Handler(PathHandler):
15 |     """
16 |     Resolve anything that's hosted under detectron2's namespace.
17 |     """
18 | 
19 |     PREFIX = "detectron2://"
20 |     S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
21 | 
22 |     def _get_supported_prefixes(self):
23 |         return [self.PREFIX]
24 | 
25 |     def _get_local_path(self, path):
26 |         name = path[len(self.PREFIX) :]
27 |         return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
28 | 
29 |     def _open(self, path, mode="r", **kwargs):
30 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
31 | 
32 | 
33 | PathManager.register_handler(HTTPURLHandler())
34 | PathManager.register_handler(OneDrivePathHandler())
35 | PathManager.register_handler(Detectron2Handler())
36 | 


--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import importlib
 3 | from pathlib import Path
 4 | 
 5 | _PROJECTS = {
 6 |     "point_rend": "PointRend",
 7 |     "deeplab": "DeepLab",
 8 |     "panoptic_deeplab": "Panoptic-DeepLab",
 9 | }
10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
11 | 
12 | if _PROJECT_ROOT.is_dir():
13 |     # This is true only for in-place installation (pip install -e, setup.py develop),
14 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
15 | 
16 |     class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
17 |         def find_spec(self, name, path, target=None):
18 |             if not name.startswith("detectron2.projects."):
19 |                 return
20 |             project_name = name.split(".")[-1]
21 |             project_dir = _PROJECTS.get(project_name)
22 |             if not project_dir:
23 |                 return
24 |             target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
25 |             if not target_file.is_file():
26 |                 return
27 |             return importlib.util.spec_from_file_location(name, target_file)
28 | 
29 |     import sys
30 | 
31 |     sys.meta_path.append(_D2ProjectsFinder())
32 | 


--------------------------------------------------------------------------------
/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.layers import ShapeSpec
 3 | 
 4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
 5 | from .backbone import (
 6 |     BACKBONE_REGISTRY,
 7 |     FPN,
 8 |     Backbone,
 9 |     ResNet,
10 |     ResNetBlockBase,
11 |     build_backbone,
12 |     build_resnet_backbone,
13 |     make_stage,
14 | )
15 | from .meta_arch import (
16 |     META_ARCH_REGISTRY,
17 |     SEM_SEG_HEADS_REGISTRY,
18 |     GeneralizedRCNN,
19 |     PanopticFPN,
20 |     ProposalNetwork,
21 |     RetinaNet,
22 |     SemanticSegmentor,
23 |     build_model,
24 |     build_sem_seg_head,
25 | )
26 | from .postprocessing import detector_postprocess
27 | from .proposal_generator import (
28 |     PROPOSAL_GENERATOR_REGISTRY,
29 |     build_proposal_generator,
30 |     RPN_HEAD_REGISTRY,
31 |     build_rpn_head,
32 | )
33 | from .roi_heads import (
34 |     ROI_BOX_HEAD_REGISTRY,
35 |     ROI_HEADS_REGISTRY,
36 |     ROI_KEYPOINT_HEAD_REGISTRY,
37 |     ROI_MASK_HEAD_REGISTRY,
38 |     ROIHeads,
39 |     StandardROIHeads,
40 |     BaseMaskRCNNHead,
41 |     BaseKeypointRCNNHead,
42 |     FastRCNNOutputLayers,
43 |     build_box_head,
44 |     build_keypoint_head,
45 |     build_mask_head,
46 |     build_roi_heads,
47 | )
48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
49 | 
50 | _EXCLUDE = {"ShapeSpec"}
51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
52 | 


--------------------------------------------------------------------------------
/model_zoo/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     NUM_CLASSES: 15
24 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     NUM_FC: 2
28 |     POOLER_RESOLUTION: 7
29 |   ROI_MASK_HEAD:
30 |     NAME: "MaskRCNNConvUpsampleHead"
31 |     NUM_CONV: 4
32 |     POOLER_RESOLUTION: 14
33 | DATASETS:
34 |   TRAIN: ("coco_2017_train",)
35 |   TEST: ("coco_2017_val",)
36 | SOLVER:
37 |   IMS_PER_BATCH: 16
38 |   BASE_LR: 0.02
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | INPUT:
42 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
43 | VERSION: 2
44 | 


--------------------------------------------------------------------------------
/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from abc import ABCMeta, abstractmethod
 3 | import torch.nn as nn
 4 | 
 5 | from detectron2.layers import ShapeSpec
 6 | 
 7 | __all__ = ["Backbone"]
 8 | 
 9 | 
10 | class Backbone(nn.Module, metaclass=ABCMeta):
11 |     """
12 |     Abstract base class for network backbones.
13 |     """
14 | 
15 |     def __init__(self):
16 |         """
17 |         The `__init__` method of any subclass can specify its own set of arguments.
18 |         """
19 |         super().__init__()
20 | 
21 |     @abstractmethod
22 |     def forward(self):
23 |         """
24 |         Subclasses must override this method, but adhere to the same return type.
25 | 
26 |         Returns:
27 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
28 |         """
29 |         pass
30 | 
31 |     @property
32 |     def size_divisibility(self) -> int:
33 |         """
34 |         Some backbones require the input height and width to be divisible by a
35 |         specific integer. This is typically true for encoder / decoder type networks
36 |         with lateral connection (e.g., FPN) for which feature maps need to match
37 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
38 |         input size divisibility is required.
39 |         """
40 |         return 0
41 | 
42 |     def output_shape(self):
43 |         """
44 |         Returns:
45 |             dict[str->ShapeSpec]
46 |         """
47 |         # this is a backward-compatible default
48 |         return {
49 |             name: ShapeSpec(
50 |                 channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
51 |             )
52 |             for name in self._out_features
53 |         }
54 | 


--------------------------------------------------------------------------------
/projects/deeplab/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class DeepLabCE(nn.Module):
 7 |     """
 8 |     Hard pixel mining with cross entropy loss, for semantic segmentation.
 9 |     This is used in TensorFlow DeepLab frameworks.
10 |     Paper: DeeperLab: Single-Shot Image Parser
11 |     Reference: https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/deeplab/utils/train_utils.py#L33  # noqa
12 |     Arguments:
13 |         ignore_label: Integer, label to ignore.
14 |         top_k_percent_pixels: Float, the value lies in [0.0, 1.0]. When its
15 |             value < 1.0, only compute the loss for the top k percent pixels
16 |             (e.g., the top 20% pixels). This is useful for hard pixel mining.
17 |         weight: Tensor, a manual rescaling weight given to each class.
18 |     """
19 | 
20 |     def __init__(self, ignore_label=-1, top_k_percent_pixels=1.0, weight=None):
21 |         super(DeepLabCE, self).__init__()
22 |         self.top_k_percent_pixels = top_k_percent_pixels
23 |         self.ignore_label = ignore_label
24 |         self.criterion = nn.CrossEntropyLoss(
25 |             weight=weight, ignore_index=ignore_label, reduction="none"
26 |         )
27 | 
28 |     def forward(self, logits, labels, weights=None):
29 |         if weights is None:
30 |             pixel_losses = self.criterion(logits, labels).contiguous().view(-1)
31 |         else:
32 |             # Apply per-pixel loss weights.
33 |             pixel_losses = self.criterion(logits, labels) * weights
34 |             pixel_losses = pixel_losses.contiguous().view(-1)
35 |         if self.top_k_percent_pixels == 1.0:
36 |             return pixel_losses.mean()
37 | 
38 |         top_k_pixels = int(self.top_k_percent_pixels * pixel_losses.numel())
39 |         pixel_losses, _ = torch.topk(pixel_losses, top_k_pixels)
40 |         return pixel_losses.mean()
41 | 


--------------------------------------------------------------------------------
/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import numpy as np
 3 | from torch.utils.data.sampler import BatchSampler, Sampler
 4 | 
 5 | 
 6 | class GroupedBatchSampler(BatchSampler):
 7 |     """
 8 |     Wraps another sampler to yield a mini-batch of indices.
 9 |     It enforces that the batch only contain elements from the same group.
10 |     It also tries to provide mini-batches which follows an ordering which is
11 |     as close as possible to the ordering from the original sampler.
12 |     """
13 | 
14 |     def __init__(self, sampler, group_ids, batch_size):
15 |         """
16 |         Args:
17 |             sampler (Sampler): Base sampler.
18 |             group_ids (list[int]): If the sampler produces indices in range [0, N),
19 |                 `group_ids` must be a list of `N` ints which contains the group id of each sample.
20 |                 The group ids must be a set of integers in the range [0, num_groups).
21 |             batch_size (int): Size of mini-batch.
22 |         """
23 |         if not isinstance(sampler, Sampler):
24 |             raise ValueError(
25 |                 "sampler should be an instance of "
26 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
27 |             )
28 |         self.sampler = sampler
29 |         self.group_ids = np.asarray(group_ids)
30 |         assert self.group_ids.ndim == 1
31 |         self.batch_size = batch_size
32 |         groups = np.unique(self.group_ids).tolist()
33 | 
34 |         # buffer the indices of each group until batch size is reached
35 |         self.buffer_per_group = {k: [] for k in groups}
36 | 
37 |     def __iter__(self):
38 |         for idx in self.sampler:
39 |             group_id = self.group_ids[idx]
40 |             group_buffer = self.buffer_per_group[group_id]
41 |             group_buffer.append(idx)
42 |             if len(group_buffer) == self.batch_size:
43 |                 yield group_buffer[:]  # yield a copy of the list
44 |                 del group_buffer[:]
45 | 
46 |     def __len__(self):
47 |         raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
48 | 


--------------------------------------------------------------------------------
/projects/panoptic_deeplab/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | from detectron2.projects.deeplab import add_deeplab_config
 6 | 
 7 | 
 8 | def add_panoptic_deeplab_config(cfg):
 9 |     """
10 |     Add config for Panoptic-DeepLab.
11 |     """
12 |     # Reuse DeepLab config.
13 |     add_deeplab_config(cfg)
14 |     # Target generation parameters.
15 |     cfg.INPUT.GAUSSIAN_SIGMA = 10
16 |     cfg.INPUT.IGNORE_STUFF_IN_OFFSET = True
17 |     cfg.INPUT.SMALL_INSTANCE_AREA = 4096
18 |     cfg.INPUT.SMALL_INSTANCE_WEIGHT = 3
19 |     cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC = False
20 |     # Optimizer type.
21 |     cfg.SOLVER.OPTIMIZER = "ADAM"
22 |     # Panoptic-DeepLab semantic segmentation head.
23 |     # We add an extra convolution before predictor.
24 |     cfg.MODEL.SEM_SEG_HEAD.HEAD_CHANNELS = 256
25 |     cfg.MODEL.SEM_SEG_HEAD.LOSS_TOP_K = 0.2
26 |     # Panoptic-DeepLab instance segmentation head.
27 |     cfg.MODEL.INS_EMBED_HEAD = CN()
28 |     cfg.MODEL.INS_EMBED_HEAD.NAME = "PanopticDeepLabInsEmbedHead"
29 |     cfg.MODEL.INS_EMBED_HEAD.IN_FEATURES = ["res2", "res3", "res5"]
30 |     cfg.MODEL.INS_EMBED_HEAD.PROJECT_FEATURES = ["res2", "res3"]
31 |     cfg.MODEL.INS_EMBED_HEAD.PROJECT_CHANNELS = [32, 64]
32 |     cfg.MODEL.INS_EMBED_HEAD.ASPP_CHANNELS = 256
33 |     cfg.MODEL.INS_EMBED_HEAD.ASPP_DILATIONS = [6, 12, 18]
34 |     cfg.MODEL.INS_EMBED_HEAD.ASPP_DROPOUT = 0.1
35 |     # We add an extra convolution before predictor.
36 |     cfg.MODEL.INS_EMBED_HEAD.HEAD_CHANNELS = 32
37 |     cfg.MODEL.INS_EMBED_HEAD.CONVS_DIM = 128
38 |     cfg.MODEL.INS_EMBED_HEAD.COMMON_STRIDE = 4
39 |     cfg.MODEL.INS_EMBED_HEAD.NORM = "SyncBN"
40 |     cfg.MODEL.INS_EMBED_HEAD.CENTER_LOSS_WEIGHT = 200.0
41 |     cfg.MODEL.INS_EMBED_HEAD.OFFSET_LOSS_WEIGHT = 0.01
42 |     # Panoptic-DeepLab post-processing setting.
43 |     cfg.MODEL.PANOPTIC_DEEPLAB = CN()
44 |     # Stuff area limit, ignore stuff region below this number.
45 |     cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA = 2048
46 |     cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD = 0.1
47 |     cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL = 7
48 |     cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE = 200
49 |     # If set to False, Panoptic-DeepLab will not evaluate instance segmentation.
50 |     cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES = True
51 | 


--------------------------------------------------------------------------------
/projects/point_rend/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | 
 6 | 
 7 | def add_pointrend_config(cfg):
 8 |     """
 9 |     Add config for PointRend.
10 |     """
11 |     # We retry random cropping until no single category in semantic segmentation GT occupies more
12 |     # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
13 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
14 |     # Color augmentatition from SSD paper for semantic segmentation model during training.
15 |     cfg.INPUT.COLOR_AUG_SSD = False
16 | 
17 |     # Names of the input feature maps to be used by a coarse mask head.
18 |     cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",)
19 |     cfg.MODEL.ROI_MASK_HEAD.FC_DIM = 1024
20 |     cfg.MODEL.ROI_MASK_HEAD.NUM_FC = 2
21 |     # The side size of a coarse mask head prediction.
22 |     cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION = 7
23 |     # True if point head is used.
24 |     cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = False
25 | 
26 |     cfg.MODEL.POINT_HEAD = CN()
27 |     cfg.MODEL.POINT_HEAD.NAME = "StandardPointHead"
28 |     cfg.MODEL.POINT_HEAD.NUM_CLASSES = 80
29 |     # Names of the input feature maps to be used by a mask point head.
30 |     cfg.MODEL.POINT_HEAD.IN_FEATURES = ("p2",)
31 |     # Number of points sampled during training for a mask point head.
32 |     cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS = 14 * 14
33 |     # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the
34 |     # original paper.
35 |     cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO = 3
36 |     # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in
37 |     # the original paper.
38 |     cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO = 0.75
39 |     # Number of subdivision steps during inference.
40 |     cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS = 5
41 |     # Maximum number of points selected at each subdivision step (N).
42 |     cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS = 28 * 28
43 |     cfg.MODEL.POINT_HEAD.FC_DIM = 256
44 |     cfg.MODEL.POINT_HEAD.NUM_FC = 3
45 |     cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK = False
46 |     # If True, then coarse prediction features are used as inout for each layer in PointRend's MLP.
47 |     cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER = True
48 |     cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME = "SemSegFPNHead"
49 | 


--------------------------------------------------------------------------------
/modeling/sampling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | 
 4 | from detectron2.layers import nonzero_tuple
 5 | 
 6 | __all__ = ["subsample_labels"]
 7 | 
 8 | 
 9 | def subsample_labels(
10 |     labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
11 | ):
12 |     """
13 |     Return `num_samples` (or fewer, if not enough found)
14 |     random samples from `labels` which is a mixture of positives & negatives.
15 |     It will try to return as many positives as possible without
16 |     exceeding `positive_fraction * num_samples`, and then try to
17 |     fill the remaining slots with negatives.
18 | 
19 |     Args:
20 |         labels (Tensor): (N, ) label vector with values:
21 |             * -1: ignore
22 |             * bg_label: background ("negative") class
23 |             * otherwise: one or more foreground ("positive") classes
24 |         num_samples (int): The total number of labels with value >= 0 to return.
25 |             Values that are not sampled will be filled with -1 (ignore).
26 |         positive_fraction (float): The number of subsampled labels with values > 0
27 |             is `min(num_positives, int(positive_fraction * num_samples))`. The number
28 |             of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
29 |             In order words, if there are not enough positives, the sample is filled with
30 |             negatives. If there are also not enough negatives, then as many elements are
31 |             sampled as is possible.
32 |         bg_label (int): label index of background ("negative") class.
33 | 
34 |     Returns:
35 |         pos_idx, neg_idx (Tensor):
36 |             1D vector of indices. The total length of both is `num_samples` or fewer.
37 |     """
38 |     positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
39 |     negative = nonzero_tuple(labels == bg_label)[0]
40 | 
41 |     num_pos = int(num_samples * positive_fraction)
42 |     # protect against not enough positive examples
43 |     num_pos = min(positive.numel(), num_pos)
44 |     num_neg = num_samples - num_pos
45 |     # protect against not enough negative examples
46 |     num_neg = min(negative.numel(), num_neg)
47 | 
48 |     # randomly select positive and negative examples
49 |     perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |     perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |     pos_idx = positive[perm1]
53 |     neg_idx = negative[perm2]
54 |     return pos_idx, neg_idx
55 | 


--------------------------------------------------------------------------------
/projects/deeplab/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import math
 3 | from typing import List
 4 | import torch
 5 | 
 6 | from detectron2.solver.lr_scheduler import _get_warmup_factor_at_iter
 7 | 
 8 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
 9 | # only on epoch boundaries. We typically use iteration based schedules instead.
10 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
11 | # "iteration" instead.
12 | 
13 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
14 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
15 | 
16 | 
17 | class WarmupPolyLR(torch.optim.lr_scheduler._LRScheduler):
18 |     """
19 |     Poly learning rate schedule used to train DeepLab.
20 |     Paper: DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
21 |         Atrous Convolution, and Fully Connected CRFs.
22 |     Reference: https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/utils/train_utils.py#L337  # noqa
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         optimizer: torch.optim.Optimizer,
28 |         max_iters: int,
29 |         warmup_factor: float = 0.001,
30 |         warmup_iters: int = 1000,
31 |         warmup_method: str = "linear",
32 |         last_epoch: int = -1,
33 |         power: float = 0.9,
34 |         constant_ending: float = 0.0,
35 |     ):
36 |         self.max_iters = max_iters
37 |         self.warmup_factor = warmup_factor
38 |         self.warmup_iters = warmup_iters
39 |         self.warmup_method = warmup_method
40 |         self.power = power
41 |         self.constant_ending = constant_ending
42 |         super().__init__(optimizer, last_epoch)
43 | 
44 |     def get_lr(self) -> List[float]:
45 |         warmup_factor = _get_warmup_factor_at_iter(
46 |             self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
47 |         )
48 |         if self.constant_ending > 0 and warmup_factor == 1.0:
49 |             # Constant ending lr.
50 |             if (
51 |                 math.pow((1.0 - self.last_epoch / self.max_iters), self.power)
52 |                 < self.constant_ending
53 |             ):
54 |                 return [base_lr * self.constant_ending for base_lr in self.base_lrs]
55 |         return [
56 |             base_lr * warmup_factor * math.pow((1.0 - self.last_epoch / self.max_iters), self.power)
57 |             for base_lr in self.base_lrs
58 |         ]
59 | 
60 |     def _compute_values(self) -> List[float]:
61 |         # The new interface
62 |         return self.get_lr()
63 | 


--------------------------------------------------------------------------------
/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | import numpy as np
 4 | import pprint
 5 | import sys
 6 | from collections import OrderedDict
 7 | from collections.abc import Mapping
 8 | 
 9 | 
10 | def print_csv_format(results):
11 |     """
12 |     Print main metrics in a format similar to Detectron,
13 |     so that they are easy to copypaste into a spreadsheet.
14 | 
15 |     Args:
16 |         results (OrderedDict[dict]): task_name -> {metric -> score}
17 |     """
18 |     assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
19 |     logger = logging.getLogger(__name__)
20 |     for task, res in results.items():
21 |         # Don't print "AP-category" metrics since they are usually not tracked.
22 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
23 |         logger.info("copypaste: Task: {}".format(task))
24 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
25 |         logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
26 | 
27 | 
28 | def verify_results(cfg, results):
29 |     """
30 |     Args:
31 |         results (OrderedDict[dict]): task_name -> {metric -> score}
32 | 
33 |     Returns:
34 |         bool: whether the verification succeeds or not
35 |     """
36 |     expected_results = cfg.TEST.EXPECTED_RESULTS
37 |     if not len(expected_results):
38 |         return True
39 | 
40 |     ok = True
41 |     for task, metric, expected, tolerance in expected_results:
42 |         actual = results[task].get(metric, None)
43 |         if actual is None:
44 |             ok = False
45 |             continue
46 |         if not np.isfinite(actual):
47 |             ok = False
48 |             continue
49 |         diff = abs(actual - expected)
50 |         if diff > tolerance:
51 |             ok = False
52 | 
53 |     logger = logging.getLogger(__name__)
54 |     if not ok:
55 |         logger.error("Result verification failed!")
56 |         logger.error("Expected Results: " + str(expected_results))
57 |         logger.error("Actual Results: " + pprint.pformat(results))
58 | 
59 |         sys.exit(1)
60 |     else:
61 |         logger.info("Results verification passed.")
62 |     return ok
63 | 
64 | 
65 | def flatten_results_dict(results):
66 |     """
67 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
68 |     If results[k1][k2][k3] = v, the returned dict will have the entry
69 |     {"k1/k2/k3": v}.
70 | 
71 |     Args:
72 |         results (dict):
73 |     """
74 |     r = {}
75 |     for k, v in results.items():
76 |         if isinstance(v, Mapping):
77 |             v = flatten_results_dict(v)
78 |             for kk, vv in v.items():
79 |                 r[k + "/" + kk] = vv
80 |         else:
81 |             r[k] = v
82 |     return r
83 | 


--------------------------------------------------------------------------------
/utils/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import logging
 4 | from contextlib import contextmanager
 5 | from functools import wraps
 6 | import torch
 7 | 
 8 | __all__ = ["retry_if_cuda_oom"]
 9 | 
10 | 
11 | @contextmanager
12 | def _ignore_torch_cuda_oom():
13 |     """
14 |     A context which ignores CUDA OOM exception from pytorch.
15 |     """
16 |     try:
17 |         yield
18 |     except RuntimeError as e:
19 |         # NOTE: the string may change?
20 |         if "CUDA out of memory. " in str(e):
21 |             pass
22 |         else:
23 |             raise
24 | 
25 | 
26 | def retry_if_cuda_oom(func):
27 |     """
28 |     Makes a function retry itself after encountering
29 |     pytorch's CUDA OOM error.
30 |     It will first retry after calling `torch.cuda.empty_cache()`.
31 | 
32 |     If that still fails, it will then retry by trying to convert inputs to CPUs.
33 |     In this case, it expects the function to dispatch to CPU implementation.
34 |     The return values may become CPU tensors as well and it's user's
35 |     responsibility to convert it back to CUDA tensor if needed.
36 | 
37 |     Args:
38 |         func: a stateless callable that takes tensor-like objects as arguments
39 | 
40 |     Returns:
41 |         a callable which retries `func` if OOM is encountered.
42 | 
43 |     Examples:
44 |     ::
45 |         output = retry_if_cuda_oom(some_torch_function)(input1, input2)
46 |         # output may be on CPU even if inputs are on GPU
47 | 
48 |     Note:
49 |         1. When converting inputs to CPU, it will only look at each argument and check
50 |            if it has `.device` and `.to` for conversion. Nested structures of tensors
51 |            are not supported.
52 | 
53 |         2. Since the function might be called more than once, it has to be
54 |            stateless.
55 |     """
56 | 
57 |     def maybe_to_cpu(x):
58 |         try:
59 |             like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
60 |         except AttributeError:
61 |             like_gpu_tensor = False
62 |         if like_gpu_tensor:
63 |             return x.to(device="cpu")
64 |         else:
65 |             return x
66 | 
67 |     @wraps(func)
68 |     def wrapped(*args, **kwargs):
69 |         with _ignore_torch_cuda_oom():
70 |             return func(*args, **kwargs)
71 | 
72 |         # Clear cache and retry
73 |         torch.cuda.empty_cache()
74 |         with _ignore_torch_cuda_oom():
75 |             return func(*args, **kwargs)
76 | 
77 |         # Try on CPU. This slows down the code significantly, therefore print a notice.
78 |         logger = logging.getLogger(__name__)
79 |         logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
80 |         new_args = (maybe_to_cpu(x) for x in args)
81 |         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
82 |         return func(*new_args, **new_kwargs)
83 | 
84 |     return wrapped
85 | 


--------------------------------------------------------------------------------
/projects/point_rend/color_augmentation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import numpy as np
 3 | import random
 4 | import cv2
 5 | from fvcore.transforms.transform import Transform
 6 | 
 7 | 
 8 | class ColorAugSSDTransform(Transform):
 9 |     """
10 |     A color related data augmentation used in Single Shot Multibox Detector (SSD).
11 | 
12 |     Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
13 |        Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
14 |        SSD: Single Shot MultiBox Detector. ECCV 2016.
15 | 
16 |     Implementation based on:
17 | 
18 |      https://github.com/weiliu89/caffe/blob
19 |        /4817bf8b4200b35ada8ed0dc378dceaf38c539e4
20 |        /src/caffe/util/im_transforms.cpp
21 | 
22 |      https://github.com/chainer/chainercv/blob
23 |        /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv
24 |        /links/model/ssd/transforms.py
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         img_format,
30 |         brightness_delta=32,
31 |         contrast_low=0.5,
32 |         contrast_high=1.5,
33 |         saturation_low=0.5,
34 |         saturation_high=1.5,
35 |         hue_delta=18,
36 |     ):
37 |         super().__init__()
38 |         assert img_format in ["BGR", "RGB"]
39 |         self.is_rgb = img_format == "RGB"
40 |         del img_format
41 |         self._set_attributes(locals())
42 | 
43 |     def apply_coords(self, coords):
44 |         return coords
45 | 
46 |     def apply_segmentation(self, segmentation):
47 |         return segmentation
48 | 
49 |     def apply_image(self, img, interp=None):
50 |         if self.is_rgb:
51 |             img = img[:, :, [2, 1, 0]]
52 |         img = self.brightness(img)
53 |         if random.randrange(2):
54 |             img = self.contrast(img)
55 |             img = self.saturation(img)
56 |             img = self.hue(img)
57 |         else:
58 |             img = self.saturation(img)
59 |             img = self.hue(img)
60 |             img = self.contrast(img)
61 |         if self.is_rgb:
62 |             img = img[:, :, [2, 1, 0]]
63 |         return img
64 | 
65 |     def convert(self, img, alpha=1, beta=0):
66 |         img = img.astype(np.float32) * alpha + beta
67 |         img = np.clip(img, 0, 255)
68 |         return img.astype(np.uint8)
69 | 
70 |     def brightness(self, img):
71 |         if random.randrange(2):
72 |             return self.convert(
73 |                 img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)
74 |             )
75 |         return img
76 | 
77 |     def contrast(self, img):
78 |         if random.randrange(2):
79 |             return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high))
80 |         return img
81 | 
82 |     def saturation(self, img):
83 |         if random.randrange(2):
84 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
85 |             img[:, :, 1] = self.convert(
86 |                 img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high)
87 |             )
88 |             return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
89 |         return img
90 | 
91 |     def hue(self, img):
92 |         if random.randrange(2):
93 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
94 |             img[:, :, 0] = (
95 |                 img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)
96 |             ) % 180
97 |             return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
98 |         return img
99 | 


--------------------------------------------------------------------------------
/data/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | import numpy as np
 5 | import os
 6 | import xml.etree.ElementTree as ET
 7 | from typing import List, Tuple, Union
 8 | 
 9 | from detectron2.data import DatasetCatalog, MetadataCatalog
10 | from detectron2.structures import BoxMode
11 | from detectron2.utils.file_io import PathManager
12 | 
13 | __all__ = ["load_voc_instances", "register_pascal_voc"]
14 | 
15 | 
16 | # fmt: off
17 | CLASS_NAMES = (
18 |     "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
19 |     "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
20 |     "pottedplant", "sheep", "sofa", "train", "tvmonitor"
21 | )
22 | # fmt: on
23 | 
24 | 
25 | def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
26 |     """
27 |     Load Pascal VOC detection annotations to Detectron2 format.
28 | 
29 |     Args:
30 |         dirname: Contain "Annotations", "ImageSets", "JPEGImages"
31 |         split (str): one of "train", "test", "val", "trainval"
32 |         class_names: list or tuple of class names
33 |     """
34 |     with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
35 |         fileids = np.loadtxt(f, dtype=np.str)
36 | 
37 |     # Needs to read many small annotation files. Makes sense at local
38 |     annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
39 |     dicts = []
40 |     for fileid in fileids:
41 |         anno_file = os.path.join(annotation_dirname, fileid + ".xml")
42 |         jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
43 | 
44 |         with PathManager.open(anno_file) as f:
45 |             tree = ET.parse(f)
46 | 
47 |         r = {
48 |             "file_name": jpeg_file,
49 |             "image_id": fileid,
50 |             "height": int(tree.findall("./size/height")[0].text),
51 |             "width": int(tree.findall("./size/width")[0].text),
52 |         }
53 |         instances = []
54 | 
55 |         for obj in tree.findall("object"):
56 |             cls = obj.find("name").text
57 |             # We include "difficult" samples in training.
58 |             # Based on limited experiments, they don't hurt accuracy.
59 |             # difficult = int(obj.find("difficult").text)
60 |             # if difficult == 1:
61 |             # continue
62 |             bbox = obj.find("bndbox")
63 |             bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
64 |             # Original annotations are integers in the range [1, W or H]
65 |             # Assuming they mean 1-based pixel indices (inclusive),
66 |             # a box with annotation (xmin=1, xmax=W) covers the whole image.
67 |             # In coordinate space this is represented by (xmin=0, xmax=W)
68 |             bbox[0] -= 1.0
69 |             bbox[1] -= 1.0
70 |             instances.append(
71 |                 {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
72 |             )
73 |         r["annotations"] = instances
74 |         dicts.append(r)
75 |     return dicts
76 | 
77 | 
78 | def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
79 |     DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
80 |     MetadataCatalog.get(name).set(
81 |         thing_classes=list(class_names), dirname=dirname, year=year, split=split
82 |     )
83 | 


--------------------------------------------------------------------------------
/layers/blocks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | import fvcore.nn.weight_init as weight_init
  5 | from torch import nn
  6 | 
  7 | from .batch_norm import FrozenBatchNorm2d, get_norm
  8 | from .wrappers import Conv2d
  9 | 
 10 | 
 11 | """
 12 | CNN building blocks.
 13 | """
 14 | 
 15 | 
 16 | class CNNBlockBase(nn.Module):
 17 |     """
 18 |     A CNN block is assumed to have input channels, output channels and a stride.
 19 |     The input and output of `forward()` method must be NCHW tensors.
 20 |     The method can perform arbitrary computation but must match the given
 21 |     channels and stride specification.
 22 | 
 23 |     Attribute:
 24 |         in_channels (int):
 25 |         out_channels (int):
 26 |         stride (int):
 27 |     """
 28 | 
 29 |     def __init__(self, in_channels, out_channels, stride):
 30 |         """
 31 |         The `__init__` method of any subclass should also contain these arguments.
 32 | 
 33 |         Args:
 34 |             in_channels (int):
 35 |             out_channels (int):
 36 |             stride (int):
 37 |         """
 38 |         super().__init__()
 39 |         self.in_channels = in_channels
 40 |         self.out_channels = out_channels
 41 |         self.stride = stride
 42 | 
 43 |     def freeze(self):
 44 |         """
 45 |         Make this block not trainable.
 46 |         This method sets all parameters to `requires_grad=False`,
 47 |         and convert all BatchNorm layers to FrozenBatchNorm
 48 | 
 49 |         Returns:
 50 |             the block itself
 51 |         """
 52 |         for p in self.parameters():
 53 |             p.requires_grad = False
 54 |         FrozenBatchNorm2d.convert_frozen_batchnorm(self)
 55 |         return self
 56 | 
 57 | 
 58 | class DepthwiseSeparableConv2d(nn.Module):
 59 |     """
 60 |     A kxk depthwise convolution + a 1x1 convolution.
 61 | 
 62 |     In :paper:`xception`, norm & activation are applied on the second conv.
 63 |     :paper:`mobilenet` uses norm & activation on both convs.
 64 |     """
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         in_channels,
 69 |         out_channels,
 70 |         kernel_size=3,
 71 |         padding=1,
 72 |         *,
 73 |         norm1=None,
 74 |         activation1=None,
 75 |         norm2=None,
 76 |         activation2=None,
 77 |     ):
 78 |         """
 79 |         Args:
 80 |             norm1, norm2 (str or callable): normalization for the two conv layers.
 81 |             activation1, activation2 (callable(Tensor) -> Tensor): activation
 82 |                 function for the two conv layers.
 83 |         """
 84 |         super().__init__()
 85 |         self.depthwise = Conv2d(
 86 |             in_channels,
 87 |             in_channels,
 88 |             kernel_size=kernel_size,
 89 |             padding=padding,
 90 |             groups=in_channels,
 91 |             bias=not norm1,
 92 |             norm=get_norm(norm1, in_channels),
 93 |             activation=activation1,
 94 |         )
 95 |         self.pointwise = Conv2d(
 96 |             in_channels,
 97 |             out_channels,
 98 |             kernel_size=1,
 99 |             bias=not norm2,
100 |             norm=get_norm(norm2, out_channels),
101 |             activation=activation2,
102 |         )
103 | 
104 |         # default initialization
105 |         weight_init.c2_msra_fill(self.depthwise)
106 |         weight_init.c2_msra_fill(self.pointwise)
107 | 
108 |     def forward(self, x):
109 |         return self.pointwise(self.depthwise(x))
110 | 


--------------------------------------------------------------------------------
/layers/roi_align_rotated.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from detectron2 import _C
 9 | 
10 | 
11 | class _ROIAlignRotated(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_rotated_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         (rois,) = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_rotated_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None, None
45 | 
46 | 
47 | roi_align_rotated = _ROIAlignRotated.apply
48 | 
49 | 
50 | class ROIAlignRotated(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         """
53 |         Args:
54 |             output_size (tuple): h, w
55 |             spatial_scale (float): scale the input boxes by this number
56 |             sampling_ratio (int): number of inputs samples to take for each output
57 |                 sample. 0 to take samples densely.
58 | 
59 |         Note:
60 |             ROIAlignRotated supports continuous coordinate by default:
61 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
62 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
63 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
64 |             from the underlying signal at continuous coordinates 0.5 and 1.5).
65 |         """
66 |         super(ROIAlignRotated, self).__init__()
67 |         self.output_size = output_size
68 |         self.spatial_scale = spatial_scale
69 |         self.sampling_ratio = sampling_ratio
70 | 
71 |     def forward(self, input, rois):
72 |         """
73 |         Args:
74 |             input: NCHW images
75 |             rois: Bx6 boxes. First column is the index into N.
76 |                 The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
77 |         """
78 |         assert rois.dim() == 2 and rois.size(1) == 6
79 |         orig_dtype = input.dtype
80 |         if orig_dtype == torch.float16:
81 |             input = input.float()
82 |             rois = rois.float()
83 |         return roi_align_rotated(
84 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
85 |         ).to(dtype=orig_dtype)
86 | 
87 |     def __repr__(self):
88 |         tmpstr = self.__class__.__name__ + "("
89 |         tmpstr += "output_size=" + str(self.output_size)
90 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
91 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
92 |         tmpstr += ")"
93 |         return tmpstr
94 | 


--------------------------------------------------------------------------------
/checkpoint/detection_checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import pickle
 3 | from fvcore.common.checkpoint import Checkpointer
 4 | 
 5 | import detectron2.utils.comm as comm
 6 | from detectron2.utils.file_io import PathManager
 7 | 
 8 | from .c2_model_loading import align_and_update_state_dicts
 9 | 
10 | 
11 | class DetectionCheckpointer(Checkpointer):
12 |     """
13 |     Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
14 |     model zoo, and apply conversions for legacy models.
15 |     """
16 | 
17 |     def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
18 |         is_main_process = comm.is_main_process()
19 |         super().__init__(
20 |             model,
21 |             save_dir,
22 |             save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
23 |             **checkpointables,
24 |         )
25 |         if hasattr(self, "path_manager"):
26 |             self.path_manager = PathManager
27 |         else:
28 |             # This could only happen for open source
29 |             # TODO remove after upgrading fvcore
30 |             from fvcore.common.file_io import PathManager as g_PathManager
31 | 
32 |             for handler in PathManager._path_handlers.values():
33 |                 try:
34 |                     g_PathManager.register_handler(handler)
35 |                 except KeyError:
36 |                     pass
37 | 
38 |     def _load_file(self, filename):
39 |         if filename.endswith(".pkl"):
40 |             with PathManager.open(filename, "rb") as f:
41 |                 data = pickle.load(f, encoding="latin1")
42 |             if "model" in data and "__author__" in data:
43 |                 # file is in Detectron2 model zoo format
44 |                 self.logger.info("Reading a file from '{}'".format(data["__author__"]))
45 |                 return data
46 |             else:
47 |                 # assume file is from Caffe2 / Detectron1 model zoo
48 |                 if "blobs" in data:
49 |                     # Detection models have "blobs", but ImageNet models don't
50 |                     data = data["blobs"]
51 |                 data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
52 |                 return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
53 | 
54 |         loaded = super()._load_file(filename)  # load native pth checkpoint
55 |         if "model" not in loaded:
56 |             loaded = {"model": loaded}
57 |         return loaded
58 | 
59 |     def _load_model(self, checkpoint):
60 |         if checkpoint.get("matching_heuristics", False):
61 |             self._convert_ndarray_to_tensor(checkpoint["model"])
62 |             # convert weights by name-matching heuristics
63 |             model_state_dict = self.model.state_dict()
64 |             align_and_update_state_dicts(
65 |                 model_state_dict,
66 |                 checkpoint["model"],
67 |                 c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
68 |             )
69 |             checkpoint["model"] = model_state_dict
70 |         # for non-caffe2 models, use standard ways to load it
71 |         incompatible = super()._load_model(checkpoint)
72 |         if incompatible is None:  # support older versions of fvcore
73 |             return None
74 | 
75 |         model_buffers = dict(self.model.named_buffers(recurse=False))
76 |         for k in ["pixel_mean", "pixel_std"]:
77 |             # Ignore missing key message about pixel_mean/std.
78 |             # Though they may be missing in old checkpoints, they will be correctly
79 |             # initialized from config anyway.
80 |             if k in model_buffers:
81 |                 try:
82 |                     incompatible.missing_keys.remove(k)
83 |                 except ValueError:
84 |                     pass
85 |         return incompatible
86 | 


--------------------------------------------------------------------------------
/engine/launch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | import torch
 4 | import torch.distributed as dist
 5 | import torch.multiprocessing as mp
 6 | 
 7 | from detectron2.utils import comm
 8 | 
 9 | __all__ = ["launch"]
10 | 
11 | 
12 | def _find_free_port():
13 |     import socket
14 | 
15 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
16 |     # Binding to port 0 will cause the OS to find an available port for us
17 |     sock.bind(("", 0))
18 |     port = sock.getsockname()[1]
19 |     sock.close()
20 |     # NOTE: there is still a chance the port could be taken by other processes.
21 |     return port
22 | 
23 | 
24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()):
25 |     """
26 |     Launch multi-gpu or distributed training.
27 |     This function must be called on all machines involved in the training.
28 |     It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
29 | 
30 |     Args:
31 |         main_func: a function that will be called by `main_func(*args)`
32 |         num_gpus_per_machine (int): number of GPUs per machine
33 |         num_machines (int): the total number of machines
34 |         machine_rank (int): the rank of this machine
35 |         dist_url (str): url to connect to for distributed jobs, including protocol
36 |                        e.g. "tcp://127.0.0.1:8686".
37 |                        Can be set to "auto" to automatically select a free port on localhost
38 |         args (tuple): arguments passed to main_func
39 |     """
40 |     world_size = num_machines * num_gpus_per_machine
41 |     if world_size > 1:
42 |         # https://github.com/pytorch/pytorch/pull/14391
43 |         # TODO prctl in spawned processes
44 | 
45 |         if dist_url == "auto":
46 |             assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
47 |             port = _find_free_port()
48 |             dist_url = f"tcp://127.0.0.1:{port}"
49 |         if num_machines > 1 and dist_url.startswith("file://"):
50 |             logger = logging.getLogger(__name__)
51 |             logger.warning(
52 |                 "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
53 |             )
54 | 
55 |         mp.spawn(
56 |             _distributed_worker,
57 |             nprocs=num_gpus_per_machine,
58 |             args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args),
59 |             daemon=False,
60 |         )
61 |     else:
62 |         main_func(*args)
63 | 
64 | 
65 | def _distributed_worker(
66 |     local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
67 | ):
68 |     assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
69 |     global_rank = machine_rank * num_gpus_per_machine + local_rank
70 |     try:
71 |         dist.init_process_group(
72 |             backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
73 |         )
74 |     except Exception as e:
75 |         logger = logging.getLogger(__name__)
76 |         logger.error("Process group URL: {}".format(dist_url))
77 |         raise e
78 |     # synchronize is needed here to prevent a possible timeout after calling init_process_group
79 |     # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
80 |     comm.synchronize()
81 | 
82 |     assert num_gpus_per_machine <= torch.cuda.device_count()
83 |     torch.cuda.set_device(local_rank)
84 | 
85 |     # Setup the local process group (which contains ranks within the same machine)
86 |     assert comm._LOCAL_PROCESS_GROUP is None
87 |     num_machines = world_size // num_gpus_per_machine
88 |     for i in range(num_machines):
89 |         ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
90 |         pg = dist.new_group(ranks_on_i)
91 |         if i == machine_rank:
92 |             comm._LOCAL_PROCESS_GROUP = pg
93 | 
94 |     main_func(*args)
95 | 


--------------------------------------------------------------------------------
/projects/point_rend/coarse_mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import fvcore.nn.weight_init as weight_init
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from detectron2.layers import Conv2d, ShapeSpec
 8 | from detectron2.modeling import ROI_MASK_HEAD_REGISTRY
 9 | 
10 | 
11 | @ROI_MASK_HEAD_REGISTRY.register()
12 | class CoarseMaskHead(nn.Module):
13 |     """
14 |     A mask head with fully connected layers. Given pooled features it first reduces channels and
15 |     spatial dimensions with conv layers and then uses FC layers to predict coarse masks analogously
16 |     to the standard box head.
17 |     """
18 | 
19 |     def __init__(self, cfg, input_shape: ShapeSpec):
20 |         """
21 |         The following attributes are parsed from config:
22 |             conv_dim: the output dimension of the conv layers
23 |             fc_dim: the feature dimenstion of the FC layers
24 |             num_fc: the number of FC layers
25 |             output_side_resolution: side resolution of the output square mask prediction
26 |         """
27 |         super(CoarseMaskHead, self).__init__()
28 | 
29 |         # fmt: off
30 |         self.num_classes            = cfg.MODEL.ROI_HEADS.NUM_CLASSES
31 |         conv_dim                    = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
32 |         self.fc_dim                 = cfg.MODEL.ROI_MASK_HEAD.FC_DIM
33 |         num_fc                      = cfg.MODEL.ROI_MASK_HEAD.NUM_FC
34 |         self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION
35 |         self.input_channels         = input_shape.channels
36 |         self.input_h                = input_shape.height
37 |         self.input_w                = input_shape.width
38 |         # fmt: on
39 | 
40 |         self.conv_layers = []
41 |         if self.input_channels > conv_dim:
42 |             self.reduce_channel_dim_conv = Conv2d(
43 |                 self.input_channels,
44 |                 conv_dim,
45 |                 kernel_size=1,
46 |                 stride=1,
47 |                 padding=0,
48 |                 bias=True,
49 |                 activation=F.relu,
50 |             )
51 |             self.conv_layers.append(self.reduce_channel_dim_conv)
52 | 
53 |         self.reduce_spatial_dim_conv = Conv2d(
54 |             conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu
55 |         )
56 |         self.conv_layers.append(self.reduce_spatial_dim_conv)
57 | 
58 |         input_dim = conv_dim * self.input_h * self.input_w
59 |         input_dim //= 4
60 | 
61 |         self.fcs = []
62 |         for k in range(num_fc):
63 |             fc = nn.Linear(input_dim, self.fc_dim)
64 |             self.add_module("coarse_mask_fc{}".format(k + 1), fc)
65 |             self.fcs.append(fc)
66 |             input_dim = self.fc_dim
67 | 
68 |         output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution
69 | 
70 |         self.prediction = nn.Linear(self.fc_dim, output_dim)
71 |         # use normal distribution initialization for mask prediction layer
72 |         nn.init.normal_(self.prediction.weight, std=0.001)
73 |         nn.init.constant_(self.prediction.bias, 0)
74 | 
75 |         for layer in self.conv_layers:
76 |             weight_init.c2_msra_fill(layer)
77 |         for layer in self.fcs:
78 |             weight_init.c2_xavier_fill(layer)
79 | 
80 |     def forward(self, x):
81 |         # unlike BaseMaskRCNNHead, this head only outputs intermediate
82 |         # features, because the features will be used later by PointHead.
83 |         N = x.shape[0]
84 |         x = x.view(N, self.input_channels, self.input_h, self.input_w)
85 |         for layer in self.conv_layers:
86 |             x = layer(x)
87 |         x = torch.flatten(x, start_dim=1)
88 |         for layer in self.fcs:
89 |             x = F.relu(layer(x))
90 |         return self.prediction(x).view(
91 |             N, self.num_classes, self.output_side_resolution, self.output_side_resolution
92 |         )
93 | 


--------------------------------------------------------------------------------
/modeling/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | from torch.nn import functional as F
 4 | 
 5 | from detectron2.layers import paste_masks_in_image
 6 | from detectron2.structures import Instances
 7 | from detectron2.utils.memory import retry_if_cuda_oom
 8 | 
 9 | 
10 | # perhaps should rename to "resize_instance"
11 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
12 |     """
13 |     Resize the output instances.
14 |     The input images are often resized when entering an object detector.
15 |     As a result, we often need the outputs of the detector in a different
16 |     resolution from its inputs.
17 | 
18 |     This function will resize the raw outputs of an R-CNN detector
19 |     to produce outputs according to the desired output resolution.
20 | 
21 |     Args:
22 |         results (Instances): the raw outputs from the detector.
23 |             `results.image_size` contains the input image resolution the detector sees.
24 |             This object might be modified in-place.
25 |         output_height, output_width: the desired output resolution.
26 | 
27 |     Returns:
28 |         Instances: the resized output from the model, based on the output resolution
29 |     """
30 | 
31 |     # Converts integer tensors to float temporaries
32 |     #   to ensure true division is performed when
33 |     #   computing scale_x and scale_y.
34 |     if isinstance(output_width, torch.Tensor):
35 |         output_width_tmp = output_width.float()
36 |     else:
37 |         output_width_tmp = output_width
38 | 
39 |     if isinstance(output_height, torch.Tensor):
40 |         output_height_tmp = output_height.float()
41 |     else:
42 |         output_height_tmp = output_height
43 | 
44 |     scale_x, scale_y = (
45 |         output_width_tmp / results.image_size[1],
46 |         output_height_tmp / results.image_size[0],
47 |     )
48 |     results = Instances((output_height, output_width), **results.get_fields())
49 | 
50 |     if results.has("pred_boxes"):
51 |         output_boxes = results.pred_boxes
52 |     elif results.has("proposal_boxes"):
53 |         output_boxes = results.proposal_boxes
54 | 
55 |     output_boxes.scale(scale_x, scale_y)
56 |     output_boxes.clip(results.image_size)
57 | 
58 |     results = results[output_boxes.nonempty()]
59 | 
60 |     if results.has("pred_masks"):
61 |         results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
62 |             results.pred_masks[:, 0, :, :],  # N, 1, M, M
63 |             results.pred_boxes,
64 |             results.image_size,
65 |             threshold=mask_threshold,
66 |         )
67 | 
68 |     if results.has("pred_keypoints"):
69 |         results.pred_keypoints[:, :, 0] *= scale_x
70 |         results.pred_keypoints[:, :, 1] *= scale_y
71 | 
72 |     return results
73 | 
74 | 
75 | def sem_seg_postprocess(result, img_size, output_height, output_width):
76 |     """
77 |     Return semantic segmentation predictions in the original resolution.
78 | 
79 |     The input images are often resized when entering semantic segmentor. Moreover, in same
80 |     cases, they also padded inside segmentor to be divisible by maximum network stride.
81 |     As a result, we often need the predictions of the segmentor in a different
82 |     resolution from its inputs.
83 | 
84 |     Args:
85 |         result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
86 |             where C is the number of classes, and H, W are the height and width of the prediction.
87 |         img_size (tuple): image size that segmentor is taking as input.
88 |         output_height, output_width: the desired output resolution.
89 | 
90 |     Returns:
91 |         semantic segmentation prediction (Tensor): A tensor of the shape
92 |             (C, output_height, output_width) that contains per-pixel soft predictions.
93 |     """
94 |     result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
95 |     result = F.interpolate(
96 |         result, size=(output_height, output_width), mode="bilinear", align_corners=False
97 |     )[0]
98 |     return result
99 | 


--------------------------------------------------------------------------------
/utils/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | """
  4 | An awesome colormap for really neat visualizations.
  5 | Copied from Detectron, and removed gray colors.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | __all__ = ["colormap", "random_color"]
 11 | 
 12 | # fmt: off
 13 | # RGB:
 14 | _COLORS = np.array(
 15 |     [
 16 |         0.000, 0.447, 0.741,
 17 |         0.850, 0.325, 0.098,
 18 |         0.929, 0.694, 0.125,
 19 |         0.494, 0.184, 0.556,
 20 |         0.466, 0.674, 0.188,
 21 |         0.301, 0.745, 0.933,
 22 |         0.635, 0.078, 0.184,
 23 |         0.300, 0.300, 0.300,
 24 |         0.600, 0.600, 0.600,
 25 |         1.000, 0.000, 0.000,
 26 |         1.000, 0.500, 0.000,
 27 |         0.749, 0.749, 0.000,
 28 |         0.000, 1.000, 0.000,
 29 |         0.000, 0.000, 1.000,
 30 |         0.667, 0.000, 1.000,
 31 |         0.333, 0.333, 0.000,
 32 |         0.333, 0.667, 0.000,
 33 |         0.333, 1.000, 0.000,
 34 |         0.667, 0.333, 0.000,
 35 |         0.667, 0.667, 0.000,
 36 |         0.667, 1.000, 0.000,
 37 |         1.000, 0.333, 0.000,
 38 |         1.000, 0.667, 0.000,
 39 |         1.000, 1.000, 0.000,
 40 |         0.000, 0.333, 0.500,
 41 |         0.000, 0.667, 0.500,
 42 |         0.000, 1.000, 0.500,
 43 |         0.333, 0.000, 0.500,
 44 |         0.333, 0.333, 0.500,
 45 |         0.333, 0.667, 0.500,
 46 |         0.333, 1.000, 0.500,
 47 |         0.667, 0.000, 0.500,
 48 |         0.667, 0.333, 0.500,
 49 |         0.667, 0.667, 0.500,
 50 |         0.667, 1.000, 0.500,
 51 |         1.000, 0.000, 0.500,
 52 |         1.000, 0.333, 0.500,
 53 |         1.000, 0.667, 0.500,
 54 |         1.000, 1.000, 0.500,
 55 |         0.000, 0.333, 1.000,
 56 |         0.000, 0.667, 1.000,
 57 |         0.000, 1.000, 1.000,
 58 |         0.333, 0.000, 1.000,
 59 |         0.333, 0.333, 1.000,
 60 |         0.333, 0.667, 1.000,
 61 |         0.333, 1.000, 1.000,
 62 |         0.667, 0.000, 1.000,
 63 |         0.667, 0.333, 1.000,
 64 |         0.667, 0.667, 1.000,
 65 |         0.667, 1.000, 1.000,
 66 |         1.000, 0.000, 1.000,
 67 |         1.000, 0.333, 1.000,
 68 |         1.000, 0.667, 1.000,
 69 |         0.333, 0.000, 0.000,
 70 |         0.500, 0.000, 0.000,
 71 |         0.667, 0.000, 0.000,
 72 |         0.833, 0.000, 0.000,
 73 |         1.000, 0.000, 0.000,
 74 |         0.000, 0.167, 0.000,
 75 |         0.000, 0.333, 0.000,
 76 |         0.000, 0.500, 0.000,
 77 |         0.000, 0.667, 0.000,
 78 |         0.000, 0.833, 0.000,
 79 |         0.000, 1.000, 0.000,
 80 |         0.000, 0.000, 0.167,
 81 |         0.000, 0.000, 0.333,
 82 |         0.000, 0.000, 0.500,
 83 |         0.000, 0.000, 0.667,
 84 |         0.000, 0.000, 0.833,
 85 |         0.000, 0.000, 1.000,
 86 |         0.000, 0.000, 0.000,
 87 |         0.143, 0.143, 0.143,
 88 |         0.857, 0.857, 0.857,
 89 |         1.000, 1.000, 1.000
 90 |     ]
 91 | ).astype(np.float32).reshape(-1, 3)
 92 | # fmt: on
 93 | 
 94 | 
 95 | def colormap(rgb=False, maximum=255):
 96 |     """
 97 |     Args:
 98 |         rgb (bool): whether to return RGB colors or BGR colors.
 99 |         maximum (int): either 255 or 1
100 | 
101 |     Returns:
102 |         ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
103 |     """
104 |     assert maximum in [255, 1], maximum
105 |     c = _COLORS * maximum
106 |     if not rgb:
107 |         c = c[:, ::-1]
108 |     return c
109 | 
110 | 
111 | def random_color(rgb=False, maximum=255):
112 |     """
113 |     Args:
114 |         rgb (bool): whether to return RGB colors or BGR colors.
115 |         maximum (int): either 255 or 1
116 | 
117 |     Returns:
118 |         ndarray: a vector of 3 numbers
119 |     """
120 |     idx = np.random.randint(0, len(_COLORS))
121 |     ret = _COLORS[idx] * maximum
122 |     if not rgb:
123 |         ret = ret[::-1]
124 |     return ret
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     import cv2
129 | 
130 |     size = 100
131 |     H, W = 10, 10
132 |     canvas = np.random.rand(H * size, W * size, 3).astype("float32")
133 |     for h in range(H):
134 |         for w in range(W):
135 |             idx = h * W + w
136 |             if idx >= len(_COLORS):
137 |                 break
138 |             canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
139 |     cv2.imshow("a", canvas)
140 |     cv2.waitKey(0)
141 | 


--------------------------------------------------------------------------------
/utils/env.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import importlib
  3 | import importlib.util
  4 | import logging
  5 | import numpy as np
  6 | import os
  7 | import random
  8 | import sys
  9 | from datetime import datetime
 10 | import torch
 11 | 
 12 | __all__ = ["seed_all_rng"]
 13 | 
 14 | 
 15 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
 16 | """
 17 | PyTorch version as a tuple of 2 ints. Useful for comparison.
 18 | """
 19 | 
 20 | 
 21 | def seed_all_rng(seed=None):
 22 |     """
 23 |     Set the random seed for the RNG in torch, numpy and python.
 24 | 
 25 |     Args:
 26 |         seed (int): if None, will use a strong random seed.
 27 |     """
 28 |     if seed is None:
 29 |         seed = (
 30 |             os.getpid()
 31 |             + int(datetime.now().strftime("%S%f"))
 32 |             + int.from_bytes(os.urandom(2), "big")
 33 |         )
 34 |         logger = logging.getLogger(__name__)
 35 |         logger.info("Using a generated random seed {}".format(seed))
 36 |     np.random.seed(seed)
 37 |     torch.set_rng_state(torch.manual_seed(seed).get_state())
 38 |     random.seed(seed)
 39 | 
 40 | 
 41 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
 42 | def _import_file(module_name, file_path, make_importable=False):
 43 |     spec = importlib.util.spec_from_file_location(module_name, file_path)
 44 |     module = importlib.util.module_from_spec(spec)
 45 |     spec.loader.exec_module(module)
 46 |     if make_importable:
 47 |         sys.modules[module_name] = module
 48 |     return module
 49 | 
 50 | 
 51 | def _configure_libraries():
 52 |     """
 53 |     Configurations for some libraries.
 54 |     """
 55 |     # An environment option to disable `import cv2` globally,
 56 |     # in case it leads to negative performance impact
 57 |     disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
 58 |     if disable_cv2:
 59 |         sys.modules["cv2"] = None
 60 |     else:
 61 |         # Disable opencl in opencv since its interaction with cuda often has negative effects
 62 |         # This envvar is supported after OpenCV 3.4.0
 63 |         os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
 64 |         try:
 65 |             import cv2
 66 | 
 67 |             if int(cv2.__version__.split(".")[0]) >= 3:
 68 |                 cv2.ocl.setUseOpenCL(False)
 69 |         except ModuleNotFoundError:
 70 |             # Other types of ImportError, if happened, should not be ignored.
 71 |             # Because a failed opencv import could mess up address space
 72 |             # https://github.com/skvark/opencv-python/issues/381
 73 |             pass
 74 | 
 75 |     def get_version(module, digit=2):
 76 |         return tuple(map(int, module.__version__.split(".")[:digit]))
 77 | 
 78 |     # fmt: off
 79 |     assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
 80 |     import fvcore
 81 |     assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1"
 82 |     import yaml
 83 |     assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
 84 |     # fmt: on
 85 | 
 86 | 
 87 | _ENV_SETUP_DONE = False
 88 | 
 89 | 
 90 | def setup_environment():
 91 |     """Perform environment setup work. The default setup is a no-op, but this
 92 |     function allows the user to specify a Python source file or a module in
 93 |     the $DETECTRON2_ENV_MODULE environment variable, that performs
 94 |     custom setup work that may be necessary to their computing environment.
 95 |     """
 96 |     global _ENV_SETUP_DONE
 97 |     if _ENV_SETUP_DONE:
 98 |         return
 99 |     _ENV_SETUP_DONE = True
100 | 
101 |     _configure_libraries()
102 | 
103 |     custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
104 | 
105 |     if custom_module_path:
106 |         setup_custom_environment(custom_module_path)
107 |     else:
108 |         # The default setup is a no-op
109 |         pass
110 | 
111 | 
112 | def setup_custom_environment(custom_module):
113 |     """
114 |     Load custom environment setup by importing a Python source file or a
115 |     module, and run the setup function.
116 |     """
117 |     if custom_module.endswith(".py"):
118 |         module = _import_file("detectron2.utils.env.custom_module", custom_module)
119 |     else:
120 |         module = importlib.import_module(custom_module)
121 |     assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
122 |         "Custom environment module defined in {} does not have the "
123 |         "required callable attribute 'setup_environment'."
124 |     ).format(custom_module)
125 |     module.setup_environment()
126 | 


--------------------------------------------------------------------------------
/modeling/roi_heads/box_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import numpy as np
  3 | from typing import List
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | 
  8 | from detectron2.config import configurable
  9 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm
 10 | from detectron2.utils.registry import Registry
 11 | 
 12 | __all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"]
 13 | 
 14 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
 15 | ROI_BOX_HEAD_REGISTRY.__doc__ = """
 16 | Registry for box heads, which make box predictions from per-region features.
 17 | 
 18 | The registered object will be called with `obj(cfg, input_shape)`.
 19 | """
 20 | 
 21 | 
 22 | # To get torchscript support, we make the head a subclass of `nn.Sequential`.
 23 | # Therefore, to add new layers in this head class, please make sure they are
 24 | # added in the order they will be used in forward().
 25 | @ROI_BOX_HEAD_REGISTRY.register()
 26 | class FastRCNNConvFCHead(nn.Sequential):
 27 |     """
 28 |     A head with several 3x3 conv layers (each followed by norm & relu) and then
 29 |     several fc layers (each followed by relu).
 30 |     """
 31 | 
 32 |     @configurable
 33 |     def __init__(
 34 |         self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
 35 |     ):
 36 |         """
 37 |         NOTE: this interface is experimental.
 38 | 
 39 |         Args:
 40 |             input_shape (ShapeSpec): shape of the input feature.
 41 |             conv_dims (list[int]): the output dimensions of the conv layers
 42 |             fc_dims (list[int]): the output dimensions of the fc layers
 43 |             conv_norm (str or callable): normalization for the conv layers.
 44 |                 See :func:`detectron2.layers.get_norm` for supported types.
 45 |         """
 46 |         super().__init__()
 47 |         assert len(conv_dims) + len(fc_dims) > 0
 48 | 
 49 |         self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
 50 | 
 51 |         self.conv_norm_relus = []
 52 |         for k, conv_dim in enumerate(conv_dims):
 53 |             conv = Conv2d(
 54 |                 self._output_size[0],
 55 |                 conv_dim,
 56 |                 kernel_size=3,
 57 |                 padding=1,
 58 |                 bias=not conv_norm,
 59 |                 norm=get_norm(conv_norm, conv_dim),
 60 |                 activation=nn.ReLU(),
 61 |             )
 62 |             self.add_module("conv{}".format(k + 1), conv)
 63 |             self.conv_norm_relus.append(conv)
 64 |             self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
 65 | 
 66 |         self.fcs = []
 67 |         for k, fc_dim in enumerate(fc_dims):
 68 |             if k == 0:
 69 |                 self.add_module("flatten", nn.Flatten())
 70 |             fc = Linear(int(np.prod(self._output_size)), fc_dim)
 71 |             self.add_module("fc{}".format(k + 1), fc)
 72 |             self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
 73 |             self.fcs.append(fc)
 74 |             self._output_size = fc_dim
 75 | 
 76 |         for layer in self.conv_norm_relus:
 77 |             weight_init.c2_msra_fill(layer)
 78 |         for layer in self.fcs:
 79 |             weight_init.c2_xavier_fill(layer)
 80 | 
 81 |     @classmethod
 82 |     def from_config(cls, cfg, input_shape):
 83 |         num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
 84 |         conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
 85 |         num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
 86 |         fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
 87 |         return {
 88 |             "input_shape": input_shape,
 89 |             "conv_dims": [conv_dim] * num_conv,
 90 |             "fc_dims": [fc_dim] * num_fc,
 91 |             "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
 92 |         }
 93 | 
 94 |     def forward(self, x):
 95 |         for layer in self:
 96 |             x = layer(x)
 97 |         return x
 98 | 
 99 |     @property
100 |     @torch.jit.unused
101 |     def output_shape(self):
102 |         """
103 |         Returns:
104 |             ShapeSpec: the output feature shape
105 |         """
106 |         o = self._output_size
107 |         if isinstance(o, int):
108 |             return ShapeSpec(channels=o)
109 |         else:
110 |             return ShapeSpec(channels=o[0], height=o[1], width=o[2])
111 | 
112 | 
113 | def build_box_head(cfg, input_shape):
114 |     """
115 |     Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
116 |     """
117 |     name = cfg.MODEL.ROI_BOX_HEAD.NAME
118 |     return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
119 | 


--------------------------------------------------------------------------------
/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import math
  3 | from bisect import bisect_right
  4 | from typing import List
  5 | import torch
  6 | 
  7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
  8 | # only on epoch boundaries. We typically use iteration based schedules instead.
  9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
 10 | # "iteration" instead.
 11 | 
 12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
 13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
 14 | 
 15 | 
 16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 17 |     def __init__(
 18 |         self,
 19 |         optimizer: torch.optim.Optimizer,
 20 |         milestones: List[int],
 21 |         gamma: float = 0.1,
 22 |         warmup_factor: float = 0.001,
 23 |         warmup_iters: int = 1000,
 24 |         warmup_method: str = "linear",
 25 |         last_epoch: int = -1,
 26 |     ):
 27 |         if not list(milestones) == sorted(milestones):
 28 |             raise ValueError(
 29 |                 "Milestones should be a list of" " increasing integers. Got {}", milestones
 30 |             )
 31 |         self.milestones = milestones
 32 |         self.gamma = gamma
 33 |         self.warmup_factor = warmup_factor
 34 |         self.warmup_iters = warmup_iters
 35 |         self.warmup_method = warmup_method
 36 |         super().__init__(optimizer, last_epoch)
 37 | 
 38 |     def get_lr(self) -> List[float]:
 39 |         warmup_factor = _get_warmup_factor_at_iter(
 40 |             self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
 41 |         )
 42 |         return [
 43 |             base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
 44 |             for base_lr in self.base_lrs
 45 |         ]
 46 | 
 47 |     def _compute_values(self) -> List[float]:
 48 |         # The new interface
 49 |         return self.get_lr()
 50 | 
 51 | 
 52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
 53 |     def __init__(
 54 |         self,
 55 |         optimizer: torch.optim.Optimizer,
 56 |         max_iters: int,
 57 |         warmup_factor: float = 0.001,
 58 |         warmup_iters: int = 1000,
 59 |         warmup_method: str = "linear",
 60 |         last_epoch: int = -1,
 61 |     ):
 62 |         self.max_iters = max_iters
 63 |         self.warmup_factor = warmup_factor
 64 |         self.warmup_iters = warmup_iters
 65 |         self.warmup_method = warmup_method
 66 |         super().__init__(optimizer, last_epoch)
 67 | 
 68 |     def get_lr(self) -> List[float]:
 69 |         warmup_factor = _get_warmup_factor_at_iter(
 70 |             self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
 71 |         )
 72 |         # Different definitions of half-cosine with warmup are possible. For
 73 |         # simplicity we multiply the standard half-cosine schedule by the warmup
 74 |         # factor. An alternative is to start the period of the cosine at warmup_iters
 75 |         # instead of at 0. In the case that warmup_iters << max_iters the two are
 76 |         # very close to each other.
 77 |         return [
 78 |             base_lr
 79 |             * warmup_factor
 80 |             * 0.5
 81 |             * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
 82 |             for base_lr in self.base_lrs
 83 |         ]
 84 | 
 85 |     def _compute_values(self) -> List[float]:
 86 |         # The new interface
 87 |         return self.get_lr()
 88 | 
 89 | 
 90 | def _get_warmup_factor_at_iter(
 91 |     method: str, iter: int, warmup_iters: int, warmup_factor: float
 92 | ) -> float:
 93 |     """
 94 |     Return the learning rate warmup factor at a specific iteration.
 95 |     See :paper:`ImageNet in 1h` for more details.
 96 | 
 97 |     Args:
 98 |         method (str): warmup method; either "constant" or "linear".
 99 |         iter (int): iteration at which to calculate the warmup factor.
100 |         warmup_iters (int): the number of warmup iterations.
101 |         warmup_factor (float): the base warmup factor (the meaning changes according
102 |             to the method used).
103 | 
104 |     Returns:
105 |         float: the effective warmup factor at the given iteration.
106 |     """
107 |     if iter >= warmup_iters:
108 |         return 1.0
109 | 
110 |     if method == "constant":
111 |         return warmup_factor
112 |     elif method == "linear":
113 |         alpha = iter / warmup_iters
114 |         return warmup_factor * (1 - alpha) + alpha
115 |     else:
116 |         raise ValueError("Unknown warmup method: {}".format(method))
117 | 


--------------------------------------------------------------------------------
/layers/wrappers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | """
  3 | Wrappers around on some nn functions, mainly to support empty tensors.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in those functions.
  6 | 
  7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013
  8 | is implemented
  9 | """
 10 | 
 11 | from typing import List
 12 | import torch
 13 | from torch.nn import functional as F
 14 | 
 15 | from detectron2.utils.env import TORCH_VERSION
 16 | 
 17 | 
 18 | def cat(tensors: List[torch.Tensor], dim: int = 0):
 19 |     """
 20 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
 21 |     """
 22 |     assert isinstance(tensors, (list, tuple))
 23 |     if len(tensors) == 1:
 24 |         return tensors[0]
 25 |     return torch.cat(tensors, dim)
 26 | 
 27 | 
 28 | class _NewEmptyTensorOp(torch.autograd.Function):
 29 |     @staticmethod
 30 |     def forward(ctx, x, new_shape):
 31 |         ctx.shape = x.shape
 32 |         return x.new_empty(new_shape)
 33 | 
 34 |     @staticmethod
 35 |     def backward(ctx, grad):
 36 |         shape = ctx.shape
 37 |         return _NewEmptyTensorOp.apply(grad, shape), None
 38 | 
 39 | 
 40 | class Conv2d(torch.nn.Conv2d):
 41 |     """
 42 |     A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
 43 |     """
 44 | 
 45 |     def __init__(self, *args, **kwargs):
 46 |         """
 47 |         Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
 48 | 
 49 |         Args:
 50 |             norm (nn.Module, optional): a normalization layer
 51 |             activation (callable(Tensor) -> Tensor): a callable activation function
 52 | 
 53 |         It assumes that norm layer is used before activation.
 54 |         """
 55 |         norm = kwargs.pop("norm", None)
 56 |         activation = kwargs.pop("activation", None)
 57 |         super().__init__(*args, **kwargs)
 58 | 
 59 |         self.norm = norm
 60 |         self.activation = activation
 61 | 
 62 |     def forward(self, x):
 63 |         # torchscript does not support SyncBatchNorm yet
 64 |         # https://github.com/pytorch/pytorch/issues/40507
 65 |         # and we skip these codes in torchscript since:
 66 |         # 1. currently we only support torchscript in evaluation mode
 67 |         # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
 68 |         # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
 69 |         if not torch.jit.is_scripting():
 70 |             if x.numel() == 0 and self.training:
 71 |                 # https://github.com/pytorch/pytorch/issues/12013
 72 |                 assert not isinstance(
 73 |                     self.norm, torch.nn.SyncBatchNorm
 74 |                 ), "SyncBatchNorm does not support empty inputs!"
 75 | 
 76 |         x = F.conv2d(
 77 |             x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
 78 |         )
 79 |         if self.norm is not None:
 80 |             x = self.norm(x)
 81 |         if self.activation is not None:
 82 |             x = self.activation(x)
 83 |         return x
 84 | 
 85 | 
 86 | ConvTranspose2d = torch.nn.ConvTranspose2d
 87 | BatchNorm2d = torch.nn.BatchNorm2d
 88 | interpolate = torch.nn.functional.interpolate
 89 | 
 90 | 
 91 | if TORCH_VERSION > (1, 5):
 92 |     Linear = torch.nn.Linear
 93 | else:
 94 | 
 95 |     class Linear(torch.nn.Linear):
 96 |         """
 97 |         A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features.
 98 |         Because of https://github.com/pytorch/pytorch/issues/34202
 99 |         """
100 | 
101 |         def forward(self, x):
102 |             if x.numel() == 0:
103 |                 output_shape = [x.shape[0], self.weight.shape[0]]
104 | 
105 |                 empty = _NewEmptyTensorOp.apply(x, output_shape)
106 |                 if self.training:
107 |                     # This is to make DDP happy.
108 |                     # DDP expects all workers to have gradient w.r.t the same set of parameters.
109 |                     _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
110 |                     return empty + _dummy
111 |                 else:
112 |                     return empty
113 | 
114 |             x = super().forward(x)
115 |             return x
116 | 
117 | 
118 | def nonzero_tuple(x):
119 |     """
120 |     A 'as_tuple=True' version of torch.nonzero to support torchscript.
121 |     because of https://github.com/pytorch/pytorch/issues/38718
122 |     """
123 |     if torch.jit.is_scripting():
124 |         if x.dim() == 0:
125 |             return x.unsqueeze(0).nonzero().unbind(1)
126 |         return x.nonzero().unbind(1)
127 |     else:
128 |         return x.nonzero(as_tuple=True)
129 | 


--------------------------------------------------------------------------------
/projects/panoptic_deeplab/dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | import logging
  4 | import numpy as np
  5 | from typing import Callable, List, Union
  6 | import torch
  7 | from panopticapi.utils import rgb2id
  8 | 
  9 | from detectron2.config import configurable
 10 | from detectron2.data import MetadataCatalog
 11 | from detectron2.data import detection_utils as utils
 12 | from detectron2.data import transforms as T
 13 | 
 14 | from .target_generator import PanopticDeepLabTargetGenerator
 15 | 
 16 | __all__ = ["PanopticDeeplabDatasetMapper"]
 17 | 
 18 | 
 19 | class PanopticDeeplabDatasetMapper:
 20 |     """
 21 |     The callable currently does the following:
 22 | 
 23 |     1. Read the image from "file_name" and label from "pan_seg_file_name"
 24 |     2. Applies random scale, crop and flip transforms to image and label
 25 |     3. Prepare data to Tensor and generate training targets from label
 26 |     """
 27 | 
 28 |     @configurable
 29 |     def __init__(
 30 |         self,
 31 |         *,
 32 |         augmentations: List[Union[T.Augmentation, T.Transform]],
 33 |         image_format: str,
 34 |         panoptic_target_generator: Callable,
 35 |     ):
 36 |         """
 37 |         NOTE: this interface is experimental.
 38 | 
 39 |         Args:
 40 |             augmentations: a list of augmentations or deterministic transforms to apply
 41 |             image_format: an image format supported by :func:`detection_utils.read_image`.
 42 |             panoptic_target_generator: a callable that takes "panoptic_seg" and
 43 |                 "segments_info" to generate training targets for the model.
 44 |         """
 45 |         # fmt: off
 46 |         self.augmentations          = T.AugmentationList(augmentations)
 47 |         self.image_format           = image_format
 48 |         # fmt: on
 49 |         logger = logging.getLogger(__name__)
 50 |         logger.info("Augmentations used in training: " + str(augmentations))
 51 | 
 52 |         self.panoptic_target_generator = panoptic_target_generator
 53 | 
 54 |     @classmethod
 55 |     def from_config(cls, cfg):
 56 |         augs = [
 57 |             T.ResizeShortestEdge(
 58 |                 cfg.INPUT.MIN_SIZE_TRAIN,
 59 |                 cfg.INPUT.MAX_SIZE_TRAIN,
 60 |                 cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
 61 |             )
 62 |         ]
 63 |         if cfg.INPUT.CROP.ENABLED:
 64 |             augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
 65 |         augs.append(T.RandomFlip())
 66 | 
 67 |         # Assume always applies to the training set.
 68 |         dataset_names = cfg.DATASETS.TRAIN
 69 |         meta = MetadataCatalog.get(dataset_names[0])
 70 |         panoptic_target_generator = PanopticDeepLabTargetGenerator(
 71 |             ignore_label=meta.ignore_label,
 72 |             thing_ids=list(meta.thing_dataset_id_to_contiguous_id.values()),
 73 |             sigma=cfg.INPUT.GAUSSIAN_SIGMA,
 74 |             ignore_stuff_in_offset=cfg.INPUT.IGNORE_STUFF_IN_OFFSET,
 75 |             small_instance_area=cfg.INPUT.SMALL_INSTANCE_AREA,
 76 |             small_instance_weight=cfg.INPUT.SMALL_INSTANCE_WEIGHT,
 77 |             ignore_crowd_in_semantic=cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC,
 78 |         )
 79 | 
 80 |         ret = {
 81 |             "augmentations": augs,
 82 |             "image_format": cfg.INPUT.FORMAT,
 83 |             "panoptic_target_generator": panoptic_target_generator,
 84 |         }
 85 |         return ret
 86 | 
 87 |     def __call__(self, dataset_dict):
 88 |         """
 89 |         Args:
 90 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
 91 | 
 92 |         Returns:
 93 |             dict: a format that builtin models in detectron2 accept
 94 |         """
 95 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
 96 |         # Load image.
 97 |         image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
 98 |         utils.check_image_size(dataset_dict, image)
 99 |         # Panoptic label is encoded in RGB image.
100 |         pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
101 | 
102 |         # Reuses semantic transform for panoptic labels.
103 |         aug_input = T.AugInput(image, sem_seg=pan_seg_gt)
104 |         _ = self.augmentations(aug_input)
105 |         image, pan_seg_gt = aug_input.image, aug_input.sem_seg
106 | 
107 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
108 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
109 |         # Therefore it's important to use torch.Tensor.
110 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
111 | 
112 |         # Generates training targets for Panoptic-DeepLab.
113 |         targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"])
114 |         dataset_dict.update(targets)
115 | 
116 |         return dataset_dict
117 | 


--------------------------------------------------------------------------------
/export/torchscript.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import torch
  5 | 
  6 | from detectron2.utils.file_io import PathManager
  7 | 
  8 | from .torchscript_patch import patch_instances
  9 | 
 10 | 
 11 | def export_torchscript_with_instances(model, fields):
 12 |     """
 13 |     Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
 14 |     attributes of :class:`Instances` are "dynamically" added in eager mode，it is difficult
 15 |     for torchscript to support it out of the box. This function is made to support scripting
 16 |     a model that uses :class:`Instances`. It does the following:
 17 | 
 18 |     1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
 19 |        but with all attributes been "static".
 20 |        The attributes need to be statically declared in the ``fields`` argument.
 21 |     2. Register ``new_Instances`` to torchscript, and force torchscript to
 22 |        use it when trying to compile ``Instances``.
 23 | 
 24 |     After this function, the process will be reverted. User should be able to script another model
 25 |     using different fields.
 26 | 
 27 |     Example:
 28 |         Assume that ``Instances`` in the model consist of two attributes named
 29 |         ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
 30 |         :class:`Tensor` respectively during inference. You can call this function like:
 31 | 
 32 |         ::
 33 |             fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
 34 |             torchscipt_model =  export_torchscript_with_instances(model, fields)
 35 | 
 36 |     Note:
 37 |         Currently we only support models in evaluation mode.
 38 | 
 39 |     Args:
 40 |         model (nn.Module): The input model to be exported to torchscript.
 41 |         fields (Dict[str, type]): Attribute names and corresponding type that
 42 |             ``Instances`` will use in the model. Note that all attributes used in ``Instances``
 43 |             need to be added, regarldess of whether they are inputs/outputs of the model.
 44 |             Data type not defined in detectron2 is not supported for now.
 45 | 
 46 |     Returns:
 47 |         torch.jit.ScriptModule: the input model in torchscript format
 48 |     """
 49 | 
 50 |     assert (
 51 |         not model.training
 52 |     ), "Currently we only support exporting models in evaluation mode to torchscript"
 53 | 
 54 |     with patch_instances(fields):
 55 |         scripted_model = torch.jit.script(model)
 56 |         return scripted_model
 57 | 
 58 | 
 59 | def dump_torchscript_IR(model, dir):
 60 |     """
 61 |     Dump IR of a TracedModule/ScriptModule at various levels.
 62 |     Useful for debugging.
 63 | 
 64 |     Args:
 65 |         model (TracedModule or ScriptModule): traced or scripted module
 66 |         dir (str): output directory to dump files.
 67 |     """
 68 |     PathManager.mkdirs(dir)
 69 | 
 70 |     def _get_script_mod(mod):
 71 |         if isinstance(mod, torch.jit.TracedModule):
 72 |             return mod._actual_script_module
 73 |         return mod
 74 | 
 75 |     # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
 76 |     with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
 77 | 
 78 |         def get_code(mod):
 79 |             # Try a few ways to get code using private attributes.
 80 |             try:
 81 |                 # This contains more information than just `mod.code`
 82 |                 return _get_script_mod(mod)._c.code
 83 |             except AttributeError:
 84 |                 pass
 85 |             try:
 86 |                 return mod.code
 87 |             except AttributeError:
 88 |                 return None
 89 | 
 90 |         def dump_code(prefix, mod):
 91 |             code = get_code(mod)
 92 |             name = prefix or "root model"
 93 |             if code is None:
 94 |                 f.write(f"Could not found code for {name} (type={mod.original_name})\n")
 95 |                 f.write("\n")
 96 |             else:
 97 |                 f.write(f"\nCode for {name}, type={mod.original_name}:\n")
 98 |                 f.write(code)
 99 |                 f.write("\n")
100 |                 f.write("-" * 80)
101 | 
102 |             for name, m in mod.named_children():
103 |                 dump_code(prefix + "." + name, m)
104 | 
105 |         dump_code("", model)
106 | 
107 |     # Recursively dump IR of all modules
108 |     with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
109 |         try:
110 |             f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
111 |         except AttributeError:
112 |             pass
113 | 
114 |     # Dump IR of the entire graph (all submodules inlined)
115 |     with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
116 |         f.write(str(model.inlined_graph))
117 | 
118 |     # Dump the model structure in pytorch style
119 |     with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
120 |         f.write(str(model))
121 | 


--------------------------------------------------------------------------------
/layers/roi_align.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from torch import nn
  3 | from torchvision.ops import roi_align as tv_roi_align
  4 | 
  5 | try:
  6 |     from torchvision import __version__
  7 | 
  8 |     version = tuple(int(x) for x in __version__.split(".")[:2])
  9 |     USE_TORCHVISION = version >= (0, 7)  # https://github.com/pytorch/vision/pull/2438
 10 | except ImportError:  # only open source torchvision has __version__
 11 |     USE_TORCHVISION = True
 12 | 
 13 | 
 14 | if USE_TORCHVISION:
 15 |     roi_align = tv_roi_align
 16 | else:
 17 |     from torch.nn.modules.utils import _pair
 18 |     from torch.autograd import Function
 19 |     from torch.autograd.function import once_differentiable
 20 |     from detectron2 import _C
 21 | 
 22 |     class _ROIAlign(Function):
 23 |         @staticmethod
 24 |         def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
 25 |             ctx.save_for_backward(roi)
 26 |             ctx.output_size = _pair(output_size)
 27 |             ctx.spatial_scale = spatial_scale
 28 |             ctx.sampling_ratio = sampling_ratio
 29 |             ctx.input_shape = input.size()
 30 |             ctx.aligned = aligned
 31 |             output = _C.roi_align_forward(
 32 |                 input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
 33 |             )
 34 |             return output
 35 | 
 36 |         @staticmethod
 37 |         @once_differentiable
 38 |         def backward(ctx, grad_output):
 39 |             (rois,) = ctx.saved_tensors
 40 |             output_size = ctx.output_size
 41 |             spatial_scale = ctx.spatial_scale
 42 |             sampling_ratio = ctx.sampling_ratio
 43 |             bs, ch, h, w = ctx.input_shape
 44 |             grad_input = _C.roi_align_backward(
 45 |                 grad_output,
 46 |                 rois,
 47 |                 spatial_scale,
 48 |                 output_size[0],
 49 |                 output_size[1],
 50 |                 bs,
 51 |                 ch,
 52 |                 h,
 53 |                 w,
 54 |                 sampling_ratio,
 55 |                 ctx.aligned,
 56 |             )
 57 |             return grad_input, None, None, None, None, None
 58 | 
 59 |     roi_align = _ROIAlign.apply
 60 | 
 61 | 
 62 | # NOTE: torchvision's RoIAlign has a different default aligned=False
 63 | class ROIAlign(nn.Module):
 64 |     def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
 65 |         """
 66 |         Args:
 67 |             output_size (tuple): h, w
 68 |             spatial_scale (float): scale the input boxes by this number
 69 |             sampling_ratio (int): number of inputs samples to take for each output
 70 |                 sample. 0 to take samples densely.
 71 |             aligned (bool): if False, use the legacy implementation in
 72 |                 Detectron. If True, align the results more perfectly.
 73 | 
 74 |         Note:
 75 |             The meaning of aligned=True:
 76 | 
 77 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
 78 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
 79 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
 80 |             from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
 81 |             roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
 82 |             pixel indices and therefore it uses pixels with a slightly incorrect alignment
 83 |             (relative to our pixel model) when performing bilinear interpolation.
 84 | 
 85 |             With `aligned=True`,
 86 |             we first appropriately scale the ROI and then shift it by -0.5
 87 |             prior to calling roi_align. This produces the correct neighbors; see
 88 |             detectron2/tests/test_roi_align.py for verification.
 89 | 
 90 |             The difference does not make a difference to the model's performance if
 91 |             ROIAlign is used together with conv layers.
 92 |         """
 93 |         super(ROIAlign, self).__init__()
 94 |         self.output_size = output_size
 95 |         self.spatial_scale = spatial_scale
 96 |         self.sampling_ratio = sampling_ratio
 97 |         self.aligned = aligned
 98 | 
 99 |     def forward(self, input, rois):
100 |         """
101 |         Args:
102 |             input: NCHW images
103 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
104 |         """
105 |         assert rois.dim() == 2 and rois.size(1) == 5
106 |         return roi_align(
107 |             input,
108 |             rois.to(dtype=input.dtype),
109 |             self.output_size,
110 |             self.spatial_scale,
111 |             self.sampling_ratio,
112 |             self.aligned,
113 |         )
114 | 
115 |     def __repr__(self):
116 |         tmpstr = self.__class__.__name__ + "("
117 |         tmpstr += "output_size=" + str(self.output_size)
118 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
119 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
120 |         tmpstr += ", aligned=" + str(self.aligned)
121 |         tmpstr += ")"
122 |         return tmpstr
123 | 


--------------------------------------------------------------------------------
/layers/aspp.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | from copy import deepcopy
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | 
  9 | from .batch_norm import get_norm
 10 | from .wrappers import Conv2d
 11 | 
 12 | 
 13 | class ASPP(nn.Module):
 14 |     """
 15 |     Atrous Spatial Pyramid Pooling (ASPP).
 16 |     """
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         in_channels,
 21 |         out_channels,
 22 |         dilations,
 23 |         *,
 24 |         norm,
 25 |         activation,
 26 |         pool_kernel_size=None,
 27 |         dropout: float = 0.0,
 28 |     ):
 29 |         """
 30 |         Args:
 31 |             in_channels (int): number of input channels for ASPP.
 32 |             out_channels (int): number of output channels.
 33 |             dilations (list): a list of 3 dilations in ASPP.
 34 |             norm (str or callable): normalization for all conv layers.
 35 |                 See :func:`layers.get_norm` for supported format. norm is
 36 |                 applied to all conv layers except the conv following
 37 |                 global average pooling.
 38 |             activation (callable): activation function.
 39 |             pool_kernel_size (tuple, list): the average pooling size (kh, kw)
 40 |                 for image pooling layer in ASPP. If set to None, it always
 41 |                 performs global average pooling. If not None, it must be
 42 |                 divisible by the shape of inputs in forward(). It is recommended
 43 |                 to use a fixed input feature size in training, and set this
 44 |                 option to match this size, so that it performs global average
 45 |                 pooling in training, and the size of the pooling window stays
 46 |                 consistent in inference.
 47 |             dropout (float): apply dropout on the output of ASPP. It is used in
 48 |                 the official DeepLab implementation with a rate of 0.1:
 49 |                 https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532  # noqa
 50 |         """
 51 |         super(ASPP, self).__init__()
 52 |         assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
 53 |         self.pool_kernel_size = pool_kernel_size
 54 |         self.dropout = dropout
 55 |         use_bias = norm == ""
 56 |         self.convs = nn.ModuleList()
 57 |         # conv 1x1
 58 |         self.convs.append(
 59 |             Conv2d(
 60 |                 in_channels,
 61 |                 out_channels,
 62 |                 kernel_size=1,
 63 |                 bias=use_bias,
 64 |                 norm=get_norm(norm, out_channels),
 65 |                 activation=deepcopy(activation),
 66 |             )
 67 |         )
 68 |         weight_init.c2_xavier_fill(self.convs[-1])
 69 |         # atrous convs
 70 |         for dilation in dilations:
 71 |             self.convs.append(
 72 |                 Conv2d(
 73 |                     in_channels,
 74 |                     out_channels,
 75 |                     kernel_size=3,
 76 |                     padding=dilation,
 77 |                     dilation=dilation,
 78 |                     bias=use_bias,
 79 |                     norm=get_norm(norm, out_channels),
 80 |                     activation=deepcopy(activation),
 81 |                 )
 82 |             )
 83 |             weight_init.c2_xavier_fill(self.convs[-1])
 84 |         # image pooling
 85 |         # We do not add BatchNorm because the spatial resolution is 1x1,
 86 |         # the original TF implementation has BatchNorm.
 87 |         if pool_kernel_size is None:
 88 |             image_pooling = nn.Sequential(
 89 |                 nn.AdaptiveAvgPool2d(1),
 90 |                 Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
 91 |             )
 92 |         else:
 93 |             image_pooling = nn.Sequential(
 94 |                 nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
 95 |                 Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
 96 |             )
 97 |         weight_init.c2_xavier_fill(image_pooling[1])
 98 |         self.convs.append(image_pooling)
 99 | 
100 |         self.project = Conv2d(
101 |             5 * out_channels,
102 |             out_channels,
103 |             kernel_size=1,
104 |             bias=use_bias,
105 |             norm=get_norm(norm, out_channels),
106 |             activation=deepcopy(activation),
107 |         )
108 |         weight_init.c2_xavier_fill(self.project)
109 | 
110 |     def forward(self, x):
111 |         size = x.shape[-2:]
112 |         if self.pool_kernel_size is not None:
113 |             if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
114 |                 raise ValueError(
115 |                     "`pool_kernel_size` must be divisible by the shape of inputs. "
116 |                     "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
117 |                 )
118 |         res = []
119 |         for conv in self.convs:
120 |             res.append(conv(x))
121 |         res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
122 |         res = torch.cat(res, dim=1)
123 |         res = self.project(res)
124 |         res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
125 |         return res
126 | 


--------------------------------------------------------------------------------
/evaluation/fast_eval_api.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | import numpy as np
  4 | import time
  5 | from pycocotools.cocoeval import COCOeval
  6 | 
  7 | from detectron2 import _C
  8 | 
  9 | 
 10 | class COCOeval_opt(COCOeval):
 11 |     """
 12 |     This is a slightly modified version of the original COCO API, where the functions evaluateImg()
 13 |     and accumulate() are implemented in C++ to speedup evaluation
 14 |     """
 15 | 
 16 |     def evaluate(self):
 17 |         """
 18 |         Run per image evaluation on given images and store results in self.evalImgs_cpp, a
 19 |         datastructure that isn't readable from Python but is used by a c++ implementation of
 20 |         accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
 21 |         self.evalImgs because this datastructure is a computational bottleneck.
 22 |         :return: None
 23 |         """
 24 |         tic = time.time()
 25 | 
 26 |         print("Running per image evaluation...")
 27 |         p = self.params
 28 |         # add backward compatibility if useSegm is specified in params
 29 |         if p.useSegm is not None:
 30 |             p.iouType = "segm" if p.useSegm == 1 else "bbox"
 31 |             print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType))
 32 |         print("Evaluate annotation type *{}*".format(p.iouType))
 33 |         p.imgIds = list(np.unique(p.imgIds))
 34 |         if p.useCats:
 35 |             p.catIds = list(np.unique(p.catIds))
 36 |         p.maxDets = sorted(p.maxDets)
 37 |         self.params = p
 38 | 
 39 |         self._prepare()
 40 | 
 41 |         # loop through images, area range, max detection number
 42 |         catIds = p.catIds if p.useCats else [-1]
 43 | 
 44 |         if p.iouType == "segm" or p.iouType == "bbox":
 45 |             computeIoU = self.computeIoU
 46 |         elif p.iouType == "keypoints":
 47 |             computeIoU = self.computeOks
 48 |         self.ious = {
 49 |             (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
 50 |         }
 51 | 
 52 |         maxDet = p.maxDets[-1]
 53 | 
 54 |         # <<<< Beginning of code differences with original COCO API
 55 |         def convert_instances_to_cpp(instances, is_det=False):
 56 |             # Convert annotations for a list of instances in an image to a format that's fast
 57 |             # to access in C++
 58 |             instances_cpp = []
 59 |             for instance in instances:
 60 |                 instance_cpp = _C.InstanceAnnotation(
 61 |                     int(instance["id"]),
 62 |                     instance["score"] if is_det else instance.get("score", 0.0),
 63 |                     instance["area"],
 64 |                     bool(instance.get("iscrowd", 0)),
 65 |                     bool(instance.get("ignore", 0)),
 66 |                 )
 67 |                 instances_cpp.append(instance_cpp)
 68 |             return instances_cpp
 69 | 
 70 |         # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
 71 |         ground_truth_instances = [
 72 |             [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
 73 |             for imgId in p.imgIds
 74 |         ]
 75 |         detected_instances = [
 76 |             [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
 77 |             for imgId in p.imgIds
 78 |         ]
 79 |         ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
 80 | 
 81 |         if not p.useCats:
 82 |             # For each image, flatten per-category lists into a single list
 83 |             ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
 84 |             detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
 85 | 
 86 |         # Call C++ implementation of self.evaluateImgs()
 87 |         self._evalImgs_cpp = _C.COCOevalEvaluateImages(
 88 |             p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
 89 |         )
 90 |         self._evalImgs = None
 91 | 
 92 |         self._paramsEval = copy.deepcopy(self.params)
 93 |         toc = time.time()
 94 |         print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
 95 |         # >>>> End of code differences with original COCO API
 96 | 
 97 |     def accumulate(self):
 98 |         """
 99 |         Accumulate per image evaluation results and store the result in self.eval.  Does not
100 |         support changing parameter settings from those used by self.evaluate()
101 |         """
102 |         print("Accumulating evaluation results...")
103 |         tic = time.time()
104 |         if not hasattr(self, "_evalImgs_cpp"):
105 |             print("Please run evaluate() first")
106 | 
107 |         self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
108 | 
109 |         # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
110 |         self.eval["recall"] = np.array(self.eval["recall"]).reshape(
111 |             self.eval["counts"][:1] + self.eval["counts"][2:]
112 |         )
113 | 
114 |         # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
115 |         # num_area_ranges X num_max_detections
116 |         self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
117 |         self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
118 |         toc = time.time()
119 |         print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
120 | 


--------------------------------------------------------------------------------
/export/caffe2_patch.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import contextlib
  4 | from unittest import mock
  5 | import torch
  6 | 
  7 | from detectron2.modeling import poolers
  8 | from detectron2.modeling.proposal_generator import rpn
  9 | from detectron2.modeling.roi_heads import keypoint_head, mask_head
 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
 11 | 
 12 | from .c10 import (
 13 |     Caffe2Compatible,
 14 |     Caffe2FastRCNNOutputsInference,
 15 |     Caffe2KeypointRCNNInference,
 16 |     Caffe2MaskRCNNInference,
 17 |     Caffe2ROIPooler,
 18 |     Caffe2RPN,
 19 | )
 20 | 
 21 | 
 22 | class GenericMixin(object):
 23 |     pass
 24 | 
 25 | 
 26 | class Caffe2CompatibleConverter(object):
 27 |     """
 28 |     A GenericUpdater which implements the `create_from` interface, by modifying
 29 |     module object and assign it with another class replaceCls.
 30 |     """
 31 | 
 32 |     def __init__(self, replaceCls):
 33 |         self.replaceCls = replaceCls
 34 | 
 35 |     def create_from(self, module):
 36 |         # update module's class to the new class
 37 |         assert isinstance(module, torch.nn.Module)
 38 |         if issubclass(self.replaceCls, GenericMixin):
 39 |             # replaceCls should act as mixin, create a new class on-the-fly
 40 |             new_class = type(
 41 |                 "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
 42 |                 (self.replaceCls, module.__class__),
 43 |                 {},  # {"new_method": lambda self: ...},
 44 |             )
 45 |             module.__class__ = new_class
 46 |         else:
 47 |             # replaceCls is complete class, this allow arbitrary class swap
 48 |             module.__class__ = self.replaceCls
 49 | 
 50 |         # initialize Caffe2Compatible
 51 |         if isinstance(module, Caffe2Compatible):
 52 |             module.tensor_mode = False
 53 | 
 54 |         return module
 55 | 
 56 | 
 57 | def patch(model, target, updater, *args, **kwargs):
 58 |     """
 59 |     recursively (post-order) update all modules with the target type and its
 60 |     subclasses, make a initialization/composition/inheritance/... via the
 61 |     updater.create_from.
 62 |     """
 63 |     for name, module in model.named_children():
 64 |         model._modules[name] = patch(module, target, updater, *args, **kwargs)
 65 |     if isinstance(model, target):
 66 |         return updater.create_from(model, *args, **kwargs)
 67 |     return model
 68 | 
 69 | 
 70 | def patch_generalized_rcnn(model):
 71 |     ccc = Caffe2CompatibleConverter
 72 |     model = patch(model, rpn.RPN, ccc(Caffe2RPN))
 73 |     model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
 74 | 
 75 |     return model
 76 | 
 77 | 
 78 | @contextlib.contextmanager
 79 | def mock_fastrcnn_outputs_inference(
 80 |     tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
 81 | ):
 82 |     with mock.patch.object(
 83 |         box_predictor_type,
 84 |         "inference",
 85 |         autospec=True,
 86 |         side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
 87 |     ) as mocked_func:
 88 |         yield
 89 |     if check:
 90 |         assert mocked_func.call_count > 0
 91 | 
 92 | 
 93 | @contextlib.contextmanager
 94 | def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
 95 |     with mock.patch(
 96 |         "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
 97 |     ) as mocked_func:
 98 |         yield
 99 |     if check:
100 |         assert mocked_func.call_count > 0
101 | 
102 | 
103 | @contextlib.contextmanager
104 | def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
105 |     with mock.patch(
106 |         "{}.keypoint_rcnn_inference".format(patched_module),
107 |         side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
108 |     ) as mocked_func:
109 |         yield
110 |     if check:
111 |         assert mocked_func.call_count > 0
112 | 
113 | 
114 | class ROIHeadsPatcher:
115 |     def __init__(self, heads, use_heatmap_max_keypoint):
116 |         self.heads = heads
117 |         self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
118 | 
119 |     @contextlib.contextmanager
120 |     def mock_roi_heads(self, tensor_mode=True):
121 |         """
122 |         Patching several inference functions inside ROIHeads and its subclasses
123 | 
124 |         Args:
125 |             tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
126 |                 format or not. Default to True.
127 |         """
128 |         # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
129 |         # are called inside the same file as BaseXxxHead due to using mock.patch.
130 |         kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
131 |         mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
132 | 
133 |         mock_ctx_managers = [
134 |             mock_fastrcnn_outputs_inference(
135 |                 tensor_mode=tensor_mode,
136 |                 check=True,
137 |                 box_predictor_type=type(self.heads.box_predictor),
138 |             )
139 |         ]
140 |         if getattr(self.heads, "keypoint_on", False):
141 |             mock_ctx_managers += [
142 |                 mock_keypoint_rcnn_inference(
143 |                     tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
144 |                 )
145 |             ]
146 |         if getattr(self.heads, "mask_on", False):
147 |             mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
148 | 
149 |         with contextlib.ExitStack() as stack:  # python 3.3+
150 |             for mgr in mock_ctx_managers:
151 |                 stack.enter_context(mgr)
152 |             yield
153 | 


--------------------------------------------------------------------------------
/structures/image_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from __future__ import division
  3 | from typing import Any, List, Tuple
  4 | import torch
  5 | from torch import device
  6 | from torch.nn import functional as F
  7 | 
  8 | from detectron2.utils.env import TORCH_VERSION
  9 | 
 10 | 
 11 | class ImageList(object):
 12 |     """
 13 |     Structure that holds a list of images (of possibly
 14 |     varying sizes) as a single tensor.
 15 |     This works by padding the images to the same size,
 16 |     and storing in a field the original sizes of each image
 17 | 
 18 |     Attributes:
 19 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w)
 20 |     """
 21 | 
 22 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
 23 |         """
 24 |         Arguments:
 25 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 26 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
 27 |                 be smaller than (H, W) due to padding.
 28 |         """
 29 |         self.tensor = tensor
 30 |         self.image_sizes = image_sizes
 31 | 
 32 |     def __len__(self) -> int:
 33 |         return len(self.image_sizes)
 34 | 
 35 |     def __getitem__(self, idx) -> torch.Tensor:
 36 |         """
 37 |         Access the individual image in its original size.
 38 | 
 39 |         Args:
 40 |             idx: int or slice
 41 | 
 42 |         Returns:
 43 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 44 |         """
 45 |         size = self.image_sizes[idx]
 46 |         return self.tensor[idx, ..., : size[0], : size[1]]
 47 | 
 48 |     @torch.jit.unused
 49 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 50 |         cast_tensor = self.tensor.to(*args, **kwargs)
 51 |         return ImageList(cast_tensor, self.image_sizes)
 52 | 
 53 |     @property
 54 |     def device(self) -> device:
 55 |         return self.tensor.device
 56 | 
 57 |     @staticmethod
 58 |     def from_tensors(
 59 |         tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
 60 |     ) -> "ImageList":
 61 |         """
 62 |         Args:
 63 |             tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
 64 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
 65 |                 to the same shape with `pad_value`.
 66 |             size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
 67 |                 the common height and width is divisible by `size_divisibility`.
 68 |                 This depends on the model and many models need a divisibility of 32.
 69 |             pad_value (float): value to pad
 70 | 
 71 |         Returns:
 72 |             an `ImageList`.
 73 |         """
 74 |         assert len(tensors) > 0
 75 |         assert isinstance(tensors, (tuple, list))
 76 |         for t in tensors:
 77 |             assert isinstance(t, torch.Tensor), type(t)
 78 |             assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
 79 | 
 80 |         # Magic code below that handles dynamic shapes for both scripting and tracing ...
 81 | 
 82 |         image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
 83 | 
 84 |         if torch.jit.is_scripting():
 85 |             max_size = torch.stack([torch.as_tensor(x) for x in image_sizes]).max(0).values
 86 |             if size_divisibility > 1:
 87 |                 stride = size_divisibility
 88 |                 # the last two dims are H,W, both subject to divisibility requirement
 89 |                 max_size = (max_size + (stride - 1)) // stride * stride
 90 | 
 91 |             max_size: List[int] = max_size.to(dtype=torch.long).tolist()
 92 |         else:
 93 |             # https://github.com/pytorch/pytorch/issues/42448
 94 |             if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
 95 |                 # In tracing mode, x.shape[i] is a scalar Tensor, and should not be converted
 96 |                 # to int: this will cause the traced graph to have hard-coded shapes.
 97 |                 # Instead we convert each shape to a vector with a stack()
 98 |                 image_sizes = [torch.stack(x) for x in image_sizes]
 99 | 
100 |                 # maximum (H, W) for the last two dims
101 |                 # find the maximum in a tracable way
102 |                 max_size = torch.stack(image_sizes).max(0).values
103 |             else:
104 |                 # Original eager logic here -- not scripting, not tracing:
105 |                 # (can be unified with scripting after
106 |                 # https://github.com/pytorch/pytorch/issues/47379)
107 |                 max_size = torch.as_tensor(
108 |                     [max(s) for s in zip(*[img.shape[-2:] for img in tensors])]
109 |                 )
110 | 
111 |             if size_divisibility > 1:
112 |                 stride = size_divisibility
113 |                 # the last two dims are H,W, both subject to divisibility requirement
114 |                 max_size = (max_size + (stride - 1)) // stride * stride
115 | 
116 |         if len(tensors) == 1:
117 |             # This seems slightly (2%) faster.
118 |             # TODO: check whether it's faster for multiple images as well
119 |             image_size = image_sizes[0]
120 |             padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
121 |             batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
122 |         else:
123 |             # max_size can be a tensor in tracing mode, therefore convert to list
124 |             batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
125 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
126 |             for img, pad_img in zip(tensors, batched_imgs):
127 |                 pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
128 | 
129 |         return ImageList(batched_imgs.contiguous(), image_sizes)
130 | 


--------------------------------------------------------------------------------
/utils/analysis.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | import typing
  6 | import torch
  7 | from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
  8 | from torch import nn
  9 | 
 10 | from detectron2.structures import BitMasks, Boxes, ImageList, Instances
 11 | 
 12 | from .logger import log_first_n
 13 | 
 14 | __all__ = [
 15 |     "activation_count_operators",
 16 |     "flop_count_operators",
 17 |     "parameter_count_table",
 18 |     "parameter_count",
 19 | ]
 20 | 
 21 | FLOPS_MODE = "flops"
 22 | ACTIVATIONS_MODE = "activations"
 23 | 
 24 | 
 25 | # some extra ops to ignore from counting.
 26 | _IGNORED_OPS = {
 27 |     "aten::add",
 28 |     "aten::add_",
 29 |     "aten::batch_norm",
 30 |     "aten::constant_pad_nd",
 31 |     "aten::div",
 32 |     "aten::div_",
 33 |     "aten::exp",
 34 |     "aten::log2",
 35 |     "aten::max_pool2d",
 36 |     "aten::meshgrid",
 37 |     "aten::mul",
 38 |     "aten::mul_",
 39 |     "aten::nonzero_numpy",
 40 |     "aten::rsub",
 41 |     "aten::sigmoid",
 42 |     "aten::sigmoid_",
 43 |     "aten::softmax",
 44 |     "aten::sort",
 45 |     "aten::sqrt",
 46 |     "aten::sub",
 47 |     "aten::upsample_nearest2d",
 48 |     "prim::PythonOp",
 49 |     "torchvision::nms",  # TODO estimate flop for nms
 50 | }
 51 | 
 52 | 
 53 | def flop_count_operators(
 54 |     model: nn.Module, inputs: list, **kwargs
 55 | ) -> typing.DefaultDict[str, float]:
 56 |     """
 57 |     Implement operator-level flops counting using jit.
 58 |     This is a wrapper of fvcore.nn.flop_count, that supports standard detection models
 59 |     in detectron2.
 60 | 
 61 |     Note:
 62 |         The function runs the input through the model to compute flops.
 63 |         The flops of a detection model is often input-dependent, for example,
 64 |         the flops of box & mask head depends on the number of proposals &
 65 |         the number of detected objects.
 66 |         Therefore, the flops counting using a single input may not accurately
 67 |         reflect the computation cost of a model.
 68 | 
 69 |     Args:
 70 |         model: a detectron2 model that takes `list[dict]` as input.
 71 |         inputs (list[dict]): inputs to model, in detectron2's standard format.
 72 |     """
 73 |     return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs)
 74 | 
 75 | 
 76 | def activation_count_operators(
 77 |     model: nn.Module, inputs: list, **kwargs
 78 | ) -> typing.DefaultDict[str, float]:
 79 |     """
 80 |     Implement operator-level activations counting using jit.
 81 |     This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
 82 |     in detectron2.
 83 | 
 84 |     Note:
 85 |         The function runs the input through the model to compute activations.
 86 |         The activations of a detection model is often input-dependent, for example,
 87 |         the activations of box & mask head depends on the number of proposals &
 88 |         the number of detected objects.
 89 | 
 90 |     Args:
 91 |         model: a detectron2 model that takes `list[dict]` as input.
 92 |         inputs (list[dict]): inputs to model, in detectron2's standard format.
 93 |     """
 94 |     return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
 95 | 
 96 | 
 97 | def _flatten_to_tuple(outputs):
 98 |     result = []
 99 |     if isinstance(outputs, torch.Tensor):
100 |         result.append(outputs)
101 |     elif isinstance(outputs, (list, tuple)):
102 |         for v in outputs:
103 |             result.extend(_flatten_to_tuple(v))
104 |     elif isinstance(outputs, dict):
105 |         for _, v in outputs.items():
106 |             result.extend(_flatten_to_tuple(v))
107 |     elif isinstance(outputs, Instances):
108 |         result.extend(_flatten_to_tuple(outputs.get_fields()))
109 |     elif isinstance(outputs, (Boxes, BitMasks, ImageList)):
110 |         result.append(outputs.tensor)
111 |     else:
112 |         log_first_n(
113 |             logging.WARN,
114 |             f"Output of type {type(outputs)} not included in flops/activations count.",
115 |             n=10,
116 |         )
117 |     return tuple(result)
118 | 
119 | 
120 | def _wrapper_count_operators(
121 |     model: nn.Module, inputs: list, mode: str, **kwargs
122 | ) -> typing.DefaultDict[str, float]:
123 | 
124 |     # ignore some ops
125 |     supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
126 |     supported_ops.update(kwargs.pop("supported_ops", {}))
127 |     kwargs["supported_ops"] = supported_ops
128 | 
129 |     assert len(inputs) == 1, "Please use batch size=1"
130 |     tensor_input = inputs[0]["image"]
131 | 
132 |     class WrapModel(nn.Module):
133 |         def __init__(self, model):
134 |             super().__init__()
135 |             if isinstance(
136 |                 model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)
137 |             ):
138 |                 self.model = model.module
139 |             else:
140 |                 self.model = model
141 | 
142 |         def forward(self, image):
143 |             # jit requires the input/output to be Tensors
144 |             inputs = [{"image": image}]
145 |             outputs = self.model.forward(inputs)
146 |             # Only the subgraph that computes the returned tuple of tensor will be
147 |             # counted. So we flatten everything we found to tuple of tensors.
148 |             return _flatten_to_tuple(outputs)
149 | 
150 |     old_train = model.training
151 |     with torch.no_grad():
152 |         if mode == FLOPS_MODE:
153 |             ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
154 |         elif mode == ACTIVATIONS_MODE:
155 |             ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
156 |         else:
157 |             raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
158 |     # compatible with change in fvcore
159 |     if isinstance(ret, tuple):
160 |         ret = ret[0]
161 |     model.train(old_train)
162 |     return ret
163 | 


--------------------------------------------------------------------------------
/checkpoint/catalog.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import logging
  3 | 
  4 | from detectron2.utils.file_io import PathHandler, PathManager
  5 | 
  6 | 
  7 | class ModelCatalog(object):
  8 |     """
  9 |     Store mappings from names to third-party models.
 10 |     """
 11 | 
 12 |     S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
 13 | 
 14 |     # MSRA models have STRIDE_IN_1X1=True. False otherwise.
 15 |     # NOTE: all BN models here have fused BN into an affine layer.
 16 |     # As a result, you should only load them to a model with "FrozenBN".
 17 |     # Loading them to a model with regular BN or SyncBN is wrong.
 18 |     # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
 19 |     # which should be negligible for training.
 20 |     # NOTE: all models here uses PIXEL_STD=[1,1,1]
 21 |     # NOTE: Most of the BN models here are no longer used. We use the
 22 |     # re-converted pre-trained models under detectron2 model zoo instead.
 23 |     C2_IMAGENET_MODELS = {
 24 |         "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
 25 |         "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
 26 |         "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
 27 |         "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
 28 |         "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
 29 |         "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
 30 |         "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
 31 |     }
 32 | 
 33 |     C2_DETECTRON_PATH_FORMAT = (
 34 |         "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
 35 |     )
 36 | 
 37 |     C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
 38 |     C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
 39 | 
 40 |     # format: {model_name} -> part of the url
 41 |     C2_DETECTRON_MODELS = {
 42 |         "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
 43 |         "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
 44 |         "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
 45 |         "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
 46 |         "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
 47 |         "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
 48 |         "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
 49 |         "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
 50 |         "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
 51 |         "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
 52 |         "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
 53 |         "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
 54 |         "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
 55 |     }
 56 | 
 57 |     @staticmethod
 58 |     def get(name):
 59 |         if name.startswith("Caffe2Detectron/COCO"):
 60 |             return ModelCatalog._get_c2_detectron_baseline(name)
 61 |         if name.startswith("ImageNetPretrained/"):
 62 |             return ModelCatalog._get_c2_imagenet_pretrained(name)
 63 |         raise RuntimeError("model not present in the catalog: {}".format(name))
 64 | 
 65 |     @staticmethod
 66 |     def _get_c2_imagenet_pretrained(name):
 67 |         prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
 68 |         name = name[len("ImageNetPretrained/") :]
 69 |         name = ModelCatalog.C2_IMAGENET_MODELS[name]
 70 |         url = "/".join([prefix, name])
 71 |         return url
 72 | 
 73 |     @staticmethod
 74 |     def _get_c2_detectron_baseline(name):
 75 |         name = name[len("Caffe2Detectron/COCO/") :]
 76 |         url = ModelCatalog.C2_DETECTRON_MODELS[name]
 77 |         if "keypoint_rcnn" in name:
 78 |             dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
 79 |         else:
 80 |             dataset = ModelCatalog.C2_DATASET_COCO
 81 | 
 82 |         if "35998355/rpn_R-50-C4_1x" in name:
 83 |             # this one model is somehow different from others ..
 84 |             type = "rpn"
 85 |         else:
 86 |             type = "generalized_rcnn"
 87 | 
 88 |         # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
 89 |         url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
 90 |             prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
 91 |         )
 92 |         return url
 93 | 
 94 | 
 95 | class ModelCatalogHandler(PathHandler):
 96 |     """
 97 |     Resolve URL like catalog://.
 98 |     """
 99 | 
100 |     PREFIX = "catalog://"
101 | 
102 |     def _get_supported_prefixes(self):
103 |         return [self.PREFIX]
104 | 
105 |     def _get_local_path(self, path):
106 |         logger = logging.getLogger(__name__)
107 |         catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
108 |         logger.info("Catalog entry {} points to {}".format(path, catalog_path))
109 |         return PathManager.get_local_path(catalog_path)
110 | 
111 |     def _open(self, path, mode="r", **kwargs):
112 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
113 | 
114 | 
115 | PathManager.register_handler(ModelCatalogHandler())
116 | 


--------------------------------------------------------------------------------
/projects/point_rend/semantic_seg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import numpy as np
  3 | from typing import Dict
  4 | import torch
  5 | from torch import nn
  6 | from torch.nn import functional as F
  7 | 
  8 | from detectron2.layers import ShapeSpec, cat
  9 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
 10 | 
 11 | from .point_features import (
 12 |     get_uncertain_point_coords_on_grid,
 13 |     get_uncertain_point_coords_with_randomness,
 14 |     point_sample,
 15 | )
 16 | from .point_head import build_point_head
 17 | 
 18 | 
 19 | def calculate_uncertainty(sem_seg_logits):
 20 |     """
 21 |     For each location of the prediction `sem_seg_logits` we estimate uncerainty as the
 22 |         difference between top first and top second predicted logits.
 23 | 
 24 |     Args:
 25 |         mask_logits (Tensor): A tensor of shape (N, C, ...), where N is the minibatch size and
 26 |             C is the number of foreground classes. The values are logits.
 27 | 
 28 |     Returns:
 29 |         scores (Tensor): A tensor of shape (N, 1, ...) that contains uncertainty scores with
 30 |             the most uncertain locations having the highest uncertainty score.
 31 |     """
 32 |     top2_scores = torch.topk(sem_seg_logits, k=2, dim=1)[0]
 33 |     return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1)
 34 | 
 35 | 
 36 | @SEM_SEG_HEADS_REGISTRY.register()
 37 | class PointRendSemSegHead(nn.Module):
 38 |     """
 39 |     A semantic segmentation head that combines a head set in `POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME`
 40 |     and a point head set in `MODEL.POINT_HEAD.NAME`.
 41 |     """
 42 | 
 43 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 44 |         super().__init__()
 45 | 
 46 |         self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
 47 | 
 48 |         self.coarse_sem_seg_head = SEM_SEG_HEADS_REGISTRY.get(
 49 |             cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME
 50 |         )(cfg, input_shape)
 51 |         self._init_point_head(cfg, input_shape)
 52 | 
 53 |     def _init_point_head(self, cfg, input_shape: Dict[str, ShapeSpec]):
 54 |         # fmt: off
 55 |         assert cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES
 56 |         feature_channels             = {k: v.channels for k, v in input_shape.items()}
 57 |         self.in_features             = cfg.MODEL.POINT_HEAD.IN_FEATURES
 58 |         self.train_num_points        = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS
 59 |         self.oversample_ratio        = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO
 60 |         self.importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO
 61 |         self.subdivision_steps       = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS
 62 |         self.subdivision_num_points  = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS
 63 |         # fmt: on
 64 | 
 65 |         in_channels = np.sum([feature_channels[f] for f in self.in_features])
 66 |         self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1))
 67 | 
 68 |     def forward(self, features, targets=None):
 69 |         coarse_sem_seg_logits = self.coarse_sem_seg_head.layers(features)
 70 | 
 71 |         if self.training:
 72 |             losses = self.coarse_sem_seg_head.losses(coarse_sem_seg_logits, targets)
 73 | 
 74 |             with torch.no_grad():
 75 |                 point_coords = get_uncertain_point_coords_with_randomness(
 76 |                     coarse_sem_seg_logits,
 77 |                     calculate_uncertainty,
 78 |                     self.train_num_points,
 79 |                     self.oversample_ratio,
 80 |                     self.importance_sample_ratio,
 81 |                 )
 82 |             coarse_features = point_sample(coarse_sem_seg_logits, point_coords, align_corners=False)
 83 | 
 84 |             fine_grained_features = cat(
 85 |                 [
 86 |                     point_sample(features[in_feature], point_coords, align_corners=False)
 87 |                     for in_feature in self.in_features
 88 |                 ],
 89 |                 dim=1,
 90 |             )
 91 |             point_logits = self.point_head(fine_grained_features, coarse_features)
 92 |             point_targets = (
 93 |                 point_sample(
 94 |                     targets.unsqueeze(1).to(torch.float),
 95 |                     point_coords,
 96 |                     mode="nearest",
 97 |                     align_corners=False,
 98 |                 )
 99 |                 .squeeze(1)
100 |                 .to(torch.long)
101 |             )
102 |             losses["loss_sem_seg_point"] = F.cross_entropy(
103 |                 point_logits, point_targets, reduction="mean", ignore_index=self.ignore_value
104 |             )
105 |             return None, losses
106 |         else:
107 |             sem_seg_logits = coarse_sem_seg_logits.clone()
108 |             for _ in range(self.subdivision_steps):
109 |                 sem_seg_logits = F.interpolate(
110 |                     sem_seg_logits, scale_factor=2, mode="bilinear", align_corners=False
111 |                 )
112 |                 uncertainty_map = calculate_uncertainty(sem_seg_logits)
113 |                 point_indices, point_coords = get_uncertain_point_coords_on_grid(
114 |                     uncertainty_map, self.subdivision_num_points
115 |                 )
116 |                 fine_grained_features = cat(
117 |                     [
118 |                         point_sample(features[in_feature], point_coords, align_corners=False)
119 |                         for in_feature in self.in_features
120 |                     ]
121 |                 )
122 |                 coarse_features = point_sample(
123 |                     coarse_sem_seg_logits, point_coords, align_corners=False
124 |                 )
125 |                 point_logits = self.point_head(fine_grained_features, coarse_features)
126 | 
127 |                 # put sem seg point predictions to the right places on the upsampled grid.
128 |                 N, C, H, W = sem_seg_logits.shape
129 |                 point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
130 |                 sem_seg_logits = (
131 |                     sem_seg_logits.reshape(N, C, H * W)
132 |                     .scatter_(2, point_indices, point_logits)
133 |                     .view(N, C, H, W)
134 |                 )
135 |             return sem_seg_logits, {}
136 | 


--------------------------------------------------------------------------------
/projects/deeplab/resnet.py:
--------------------------------------------------------------------------------
  1 | import fvcore.nn.weight_init as weight_init
  2 | import torch.nn.functional as F
  3 | 
  4 | from detectron2.layers import CNNBlockBase, Conv2d, get_norm
  5 | from detectron2.modeling import BACKBONE_REGISTRY
  6 | from detectron2.modeling.backbone.resnet import (
  7 |     BasicStem,
  8 |     BottleneckBlock,
  9 |     DeformBottleneckBlock,
 10 |     ResNet,
 11 | )
 12 | 
 13 | 
 14 | class DeepLabStem(CNNBlockBase):
 15 |     """
 16 |     The DeepLab ResNet stem (layers before the first residual block).
 17 |     """
 18 | 
 19 |     def __init__(self, in_channels=3, out_channels=128, norm="BN"):
 20 |         """
 21 |         Args:
 22 |             norm (str or callable): norm after the first conv layer.
 23 |                 See :func:`layers.get_norm` for supported format.
 24 |         """
 25 |         super().__init__(in_channels, out_channels, 4)
 26 |         self.in_channels = in_channels
 27 |         self.conv1 = Conv2d(
 28 |             in_channels,
 29 |             out_channels // 2,
 30 |             kernel_size=3,
 31 |             stride=2,
 32 |             padding=1,
 33 |             bias=False,
 34 |             norm=get_norm(norm, out_channels // 2),
 35 |         )
 36 |         self.conv2 = Conv2d(
 37 |             out_channels // 2,
 38 |             out_channels // 2,
 39 |             kernel_size=3,
 40 |             stride=1,
 41 |             padding=1,
 42 |             bias=False,
 43 |             norm=get_norm(norm, out_channels // 2),
 44 |         )
 45 |         self.conv3 = Conv2d(
 46 |             out_channels // 2,
 47 |             out_channels,
 48 |             kernel_size=3,
 49 |             stride=1,
 50 |             padding=1,
 51 |             bias=False,
 52 |             norm=get_norm(norm, out_channels),
 53 |         )
 54 |         weight_init.c2_msra_fill(self.conv1)
 55 |         weight_init.c2_msra_fill(self.conv2)
 56 |         weight_init.c2_msra_fill(self.conv3)
 57 | 
 58 |     def forward(self, x):
 59 |         x = self.conv1(x)
 60 |         x = F.relu_(x)
 61 |         x = self.conv2(x)
 62 |         x = F.relu_(x)
 63 |         x = self.conv3(x)
 64 |         x = F.relu_(x)
 65 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
 66 |         return x
 67 | 
 68 | 
 69 | @BACKBONE_REGISTRY.register()
 70 | def build_resnet_deeplab_backbone(cfg, input_shape):
 71 |     """
 72 |     Create a ResNet instance from config.
 73 |     Returns:
 74 |         ResNet: a :class:`ResNet` instance.
 75 |     """
 76 |     # need registration of new blocks/stems?
 77 |     norm = cfg.MODEL.RESNETS.NORM
 78 |     if cfg.MODEL.RESNETS.STEM_TYPE == "basic":
 79 |         stem = BasicStem(
 80 |             in_channels=input_shape.channels,
 81 |             out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
 82 |             norm=norm,
 83 |         )
 84 |     elif cfg.MODEL.RESNETS.STEM_TYPE == "deeplab":
 85 |         stem = DeepLabStem(
 86 |             in_channels=input_shape.channels,
 87 |             out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
 88 |             norm=norm,
 89 |         )
 90 |     else:
 91 |         raise ValueError("Unknown stem type: {}".format(cfg.MODEL.RESNETS.STEM_TYPE))
 92 | 
 93 |     # fmt: off
 94 |     freeze_at           = cfg.MODEL.BACKBONE.FREEZE_AT
 95 |     out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
 96 |     depth               = cfg.MODEL.RESNETS.DEPTH
 97 |     num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
 98 |     width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
 99 |     bottleneck_channels = num_groups * width_per_group
100 |     in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
101 |     out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
102 |     stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
103 |     res4_dilation       = cfg.MODEL.RESNETS.RES4_DILATION
104 |     res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
105 |     deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
106 |     deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
107 |     deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
108 |     res5_multi_grid     = cfg.MODEL.RESNETS.RES5_MULTI_GRID
109 |     # fmt: on
110 |     assert res4_dilation in {1, 2}, "res4_dilation cannot be {}.".format(res4_dilation)
111 |     assert res5_dilation in {1, 2, 4}, "res5_dilation cannot be {}.".format(res5_dilation)
112 |     if res4_dilation == 2:
113 |         # Always dilate res5 if res4 is dilated.
114 |         assert res5_dilation == 4
115 | 
116 |     num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
117 | 
118 |     stages = []
119 | 
120 |     # Avoid creating variables without gradients
121 |     # It consumes extra memory and may cause allreduce to fail
122 |     out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features]
123 |     max_stage_idx = max(out_stage_idx)
124 |     for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
125 |         if stage_idx == 4:
126 |             dilation = res4_dilation
127 |         elif stage_idx == 5:
128 |             dilation = res5_dilation
129 |         else:
130 |             dilation = 1
131 |         first_stride = 1 if idx == 0 or dilation > 1 else 2
132 |         stage_kargs = {
133 |             "num_blocks": num_blocks_per_stage[idx],
134 |             "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1),
135 |             "in_channels": in_channels,
136 |             "out_channels": out_channels,
137 |             "norm": norm,
138 |         }
139 |         stage_kargs["bottleneck_channels"] = bottleneck_channels
140 |         stage_kargs["stride_in_1x1"] = stride_in_1x1
141 |         stage_kargs["dilation"] = dilation
142 |         stage_kargs["num_groups"] = num_groups
143 |         if deform_on_per_stage[idx]:
144 |             stage_kargs["block_class"] = DeformBottleneckBlock
145 |             stage_kargs["deform_modulated"] = deform_modulated
146 |             stage_kargs["deform_num_groups"] = deform_num_groups
147 |         else:
148 |             stage_kargs["block_class"] = BottleneckBlock
149 |         if stage_idx == 5:
150 |             stage_kargs.pop("dilation")
151 |             stage_kargs["dilation_per_block"] = [dilation * mg for mg in res5_multi_grid]
152 |         blocks = ResNet.make_stage(**stage_kargs)
153 |         in_channels = out_channels
154 |         out_channels *= 2
155 |         bottleneck_channels *= 2
156 |         stages.append(blocks)
157 |     return ResNet(stem, stages, out_features=out_features).freeze(freeze_at)
158 | 


--------------------------------------------------------------------------------
/modeling/matcher.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from typing import List
  3 | import torch
  4 | 
  5 | from detectron2.layers import nonzero_tuple
  6 | 
  7 | 
  8 | class Matcher(object):
  9 |     """
 10 |     This class assigns to each predicted "element" (e.g., a box) a ground-truth
 11 |     element. Each predicted element will have exactly zero or one matches; each
 12 |     ground-truth element may be matched to zero or more predicted elements.
 13 | 
 14 |     The matching is determined by the MxN match_quality_matrix, that characterizes
 15 |     how well each (ground-truth, prediction)-pair match each other. For example,
 16 |     if the elements are boxes, this matrix may contain box intersection-over-union
 17 |     overlap values.
 18 | 
 19 |     The matcher returns (a) a vector of length N containing the index of the
 20 |     ground-truth element m in [0, M) that matches to prediction n in [0, N).
 21 |     (b) a vector of length N containing the labels for each prediction.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
 26 |     ):
 27 |         """
 28 |         Args:
 29 |             thresholds (list): a list of thresholds used to stratify predictions
 30 |                 into levels.
 31 |             labels (list): a list of values to label predictions belonging at
 32 |                 each level. A label can be one of {-1, 0, 1} signifying
 33 |                 {ignore, negative class, positive class}, respectively.
 34 |             allow_low_quality_matches (bool): if True, produce additional matches
 35 |                 for predictions with maximum match quality lower than high_threshold.
 36 |                 See set_low_quality_matches_ for more details.
 37 | 
 38 |             For example,
 39 |                 thresholds = [0.3, 0.5]
 40 |                 labels = [0, -1, 1]
 41 |                 All predictions with iou < 0.3 will be marked with 0 and
 42 |                 thus will be considered as false positives while training.
 43 |                 All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
 44 |                 thus will be ignored.
 45 |                 All predictions with 0.5 <= iou will be marked with 1 and
 46 |                 thus will be considered as true positives.
 47 |         """
 48 |         # Add -inf and +inf to first and last position in thresholds
 49 |         thresholds = thresholds[:]
 50 |         assert thresholds[0] > 0
 51 |         thresholds.insert(0, -float("inf"))
 52 |         thresholds.append(float("inf"))
 53 |         # Currently torchscript does not support all + generator
 54 |         assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
 55 |         assert all([l in [-1, 0, 1] for l in labels])
 56 |         assert len(labels) == len(thresholds) - 1
 57 |         self.thresholds = thresholds
 58 |         self.labels = labels
 59 |         self.allow_low_quality_matches = allow_low_quality_matches
 60 | 
 61 |     def __call__(self, match_quality_matrix):
 62 |         """
 63 |         Args:
 64 |             match_quality_matrix (Tensor[float]): an MxN tensor, containing the
 65 |                 pairwise quality between M ground-truth elements and N predicted
 66 |                 elements. All elements must be >= 0 (due to the us of `torch.nonzero`
 67 |                 for selecting indices in :meth:`set_low_quality_matches_`).
 68 | 
 69 |         Returns:
 70 |             matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
 71 |                 ground-truth index in [0, M)
 72 |             match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
 73 |                 whether a prediction is a true or false positive or ignored
 74 |         """
 75 |         assert match_quality_matrix.dim() == 2
 76 |         if match_quality_matrix.numel() == 0:
 77 |             default_matches = match_quality_matrix.new_full(
 78 |                 (match_quality_matrix.size(1),), 0, dtype=torch.int64
 79 |             )
 80 |             # When no gt boxes exist, we define IOU = 0 and therefore set labels
 81 |             # to `self.labels[0]`, which usually defaults to background class 0
 82 |             # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
 83 |             default_match_labels = match_quality_matrix.new_full(
 84 |                 (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
 85 |             )
 86 |             return default_matches, default_match_labels
 87 | 
 88 |         assert torch.all(match_quality_matrix >= 0)
 89 | 
 90 |         # match_quality_matrix is M (gt) x N (predicted)
 91 |         # Max over gt elements (dim 0) to find best gt candidate for each prediction
 92 |         matched_vals, matches = match_quality_matrix.max(dim=0)
 93 | 
 94 |         match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
 95 | 
 96 |         for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
 97 |             low_high = (matched_vals >= low) & (matched_vals < high)
 98 |             match_labels[low_high] = l
 99 | 
100 |         if self.allow_low_quality_matches:
101 |             self.set_low_quality_matches_(match_labels, match_quality_matrix)
102 | 
103 |         return matches, match_labels
104 | 
105 |     def set_low_quality_matches_(self, match_labels, match_quality_matrix):
106 |         """
107 |         Produce additional matches for predictions that have only low-quality matches.
108 |         Specifically, for each ground-truth G find the set of predictions that have
109 |         maximum overlap with it (including ties); for each prediction in that set, if
110 |         it is unmatched, then match it to the ground-truth G.
111 | 
112 |         This function implements the RPN assignment case (i) in Sec. 3.1.2 of
113 |         :paper:`Faster R-CNN`.
114 |         """
115 |         # For each gt, find the prediction with which it has highest quality
116 |         highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
117 |         # Find the highest quality match available, even if it is low, including ties.
118 |         # Note that the matches qualities must be positive due to the use of
119 |         # `torch.nonzero`.
120 |         _, pred_inds_with_highest_quality = nonzero_tuple(
121 |             match_quality_matrix == highest_quality_foreach_gt[:, None]
122 |         )
123 |         # If an anchor was labeled positive only due to a low-quality match
124 |         # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
125 |         # This follows the implementation in Detectron, and is found to have no significant impact.
126 |         match_labels[pred_inds_with_highest_quality] = 1
127 | 


--------------------------------------------------------------------------------