├── .gitignore ├── utils ├── __init__.py ├── registry.py ├── serialize.py ├── file_io.py ├── memory.py ├── colormap.py ├── env.py └── analysis.py ├── export ├── __init__.py ├── torchscript.py └── caffe2_patch.py ├── model_zoo ├── configs │ ├── COCO-Detection │ │ ├── retinanet_R_50_FPN_1x.yaml │ │ ├── faster_rcnn_R_50_C4_1x.yaml │ │ ├── faster_rcnn_R_50_FPN_1x.yaml │ │ ├── faster_rcnn_R_50_DC5_1x.yaml │ │ ├── retinanet_R_50_FPN_3x.yaml │ │ ├── retinanet_R_101_FPN_3x.yaml │ │ ├── faster_rcnn_R_50_C4_3x.yaml │ │ ├── faster_rcnn_R_101_C4_3x.yaml │ │ ├── faster_rcnn_R_101_FPN_3x.yaml │ │ ├── faster_rcnn_R_50_FPN_3x.yaml │ │ ├── faster_rcnn_R_50_DC5_3x.yaml │ │ ├── faster_rcnn_R_101_DC5_3x.yaml │ │ ├── rpn_R_50_FPN_1x.yaml │ │ ├── rpn_R_50_C4_1x.yaml │ │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ │ └── fast_rcnn_R_50_FPN_1x.yaml │ ├── COCO-Keypoints │ │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ │ ├── keypoint_rcnn_R_50_FPN_3x.yaml │ │ ├── keypoint_rcnn_R_101_FPN_3x.yaml │ │ ├── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml │ │ └── Base-Keypoint-RCNN-FPN.yaml │ ├── COCO-PanopticSegmentation │ │ ├── panoptic_fpn_R_50_1x.yaml │ │ ├── panoptic_fpn_R_50_3x.yaml │ │ ├── panoptic_fpn_R_101_3x.yaml │ │ └── Base-Panoptic-FPN.yaml │ ├── COCO-InstanceSegmentation │ │ ├── mask_rcnn_R_50_C4_1x.yaml │ │ ├── mask_rcnn_R_50_FPN_1x.yaml │ │ ├── mask_rcnn_R_50_DC5_1x.yaml │ │ ├── mask_rcnn_R_50_C4_3x.yaml │ │ ├── mask_rcnn_R_50_FPN_3x.yaml │ │ ├── mask_rcnn_R_101_C4_3x.yaml │ │ ├── mask_rcnn_R_101_FPN_3x.yaml │ │ ├── mask_rcnn_R_101_DC5_3x.yaml │ │ ├── mask_rcnn_R_50_DC5_3x.yaml │ │ ├── mask_rcnn_R_50_FPN_1x_giou.yaml │ │ └── mask_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── quick_schedules │ │ ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml │ │ ├── rpn_R_50_FPN_inference_acc_test.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml │ │ ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml │ │ ├── retinanet_R_50_FPN_inference_acc_test.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml │ │ ├── mask_rcnn_R_50_C4_inference_acc_test.yaml │ │ ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml │ │ ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml │ │ ├── rpn_R_50_FPN_instant_test.yaml │ │ ├── mask_rcnn_R_50_C4_instant_test.yaml │ │ ├── mask_rcnn_R_50_FPN_instant_test.yaml │ │ ├── retinanet_R_50_FPN_instant_test.yaml │ │ ├── keypoint_rcnn_R_50_FPN_instant_test.yaml │ │ ├── semantic_R_50_FPN_inference_acc_test.yaml │ │ ├── panoptic_fpn_R_50_inference_acc_test.yaml │ │ ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml │ │ ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml │ │ ├── panoptic_fpn_R_50_instant_test.yaml │ │ ├── semantic_R_50_FPN_instant_test.yaml │ │ ├── mask_rcnn_R_50_FPN_training_acc_test.yaml │ │ ├── fast_rcnn_R_50_FPN_instant_test.yaml │ │ ├── semantic_R_50_FPN_training_acc_test.yaml │ │ ├── mask_rcnn_R_50_C4_training_acc_test.yaml │ │ ├── panoptic_fpn_R_50_training_acc_test.yaml │ │ ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml │ │ └── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml │ ├── Misc │ │ ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml │ │ ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_1x.yaml │ │ ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml │ │ ├── semantic_R_50_FPN_1x.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_3x.yaml │ │ ├── mask_rcnn_R_50_FPN_3x_gn.yaml │ │ ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml │ │ ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml │ │ ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml │ │ ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml │ │ ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml │ │ ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ │ └── cascade_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ ├── Base-RCNN-C4.yaml │ ├── PascalVOC-Detection │ │ ├── faster_rcnn_R_50_C4.yaml │ │ └── faster_rcnn_R_50_FPN.yaml │ ├── Detectron1-Comparisons │ │ ├── faster_rcnn_R_50_FPN_noaug_1x.yaml │ │ ├── mask_rcnn_R_50_FPN_noaug_1x.yaml │ │ └── keypoint_rcnn_R_50_FPN_1x.yaml │ ├── LVISv0.5-InstanceSegmentation │ │ ├── mask_rcnn_R_50_FPN_1x.yaml │ │ ├── mask_rcnn_R_101_FPN_1x.yaml │ │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml │ ├── LVISv1-InstanceSegmentation │ │ ├── mask_rcnn_R_50_FPN_1x.yaml │ │ ├── mask_rcnn_R_101_FPN_1x.yaml │ │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml │ ├── Base-RCNN-DilatedC5.yaml │ ├── Base-RetinaNet.yaml │ ├── Cityscapes │ │ └── mask_rcnn_R_50_FPN.yaml │ └── Base-RCNN-FPN.yaml └── __init__.py ├── data ├── datasets │ ├── register_coco.py │ ├── __init__.py │ └── pascal_voc.py ├── transforms │ └── __init__.py ├── samplers │ ├── __init__.py │ └── grouped_batch_sampler.py └── __init__.py ├── modeling ├── proposal_generator │ ├── __init__.py │ └── build.py ├── backbone │ ├── __init__.py │ ├── build.py │ └── backbone.py ├── meta_arch │ ├── __init__.py │ └── build.py ├── roi_heads │ ├── __init__.py │ └── box_head.py ├── __init__.py ├── sampling.py ├── postprocessing.py └── matcher.py ├── projects ├── deeplab │ ├── __init__.py │ ├── build_solver.py │ ├── config.py │ ├── loss.py │ ├── lr_scheduler.py │ └── resnet.py ├── point_rend │ ├── __init__.py │ ├── config.py │ ├── color_augmentation.py │ ├── coarse_mask_head.py │ └── semantic_seg.py ├── panoptic_deeplab │ ├── __init__.py │ ├── config.py │ └── dataset_mapper.py └── __init__.py ├── solver ├── __init__.py └── lr_scheduler.py ├── __init__.py ├── config └── __init__.py ├── checkpoint ├── __init__.py ├── detection_checkpoint.py └── catalog.py ├── engine ├── __init__.py └── launch.py ├── structures ├── __init__.py └── image_list.py ├── evaluation ├── __init__.py ├── testing.py └── fast_eval_api.py └── layers ├── __init__.py ├── rotated_boxes.py ├── shape_spec.py ├── blocks.py ├── roi_align_rotated.py ├── wrappers.py ├── roi_align.py └── aspp.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.pyc 3 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /export/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .api import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /data/datasets/register_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import register_coco_instances # noqa 3 | from .coco_panoptic import register_coco_panoptic_separated # noqa 4 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Keep this module for backward compatibility. 4 | from fvcore.common.registry import Registry # noqa 5 | 6 | __all__ = ["Registry"] 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator 3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN 4 | 5 | __all__ = list(globals().keys()) 6 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /projects/deeplab/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build_solver import build_lr_scheduler 3 | from .config import add_deeplab_config 4 | from .resnet import build_resnet_deeplab_backbone 5 | from .semantic_seg import DeepLabV3Head, DeepLabV3PlusHead 6 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | TRAIN_ON_PRED_BOXES: True 5 | TEST: 6 | EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | 8 | # This line will be programatically read/write by setup.py. 9 | # Leave them at the bottom of this file and don't touch them. 10 | __version__ = "0.3" 11 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | -------------------------------------------------------------------------------- /projects/point_rend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .config import add_pointrend_config 3 | from .coarse_mask_head import CoarseMaskHead 4 | from .roi_heads import PointRendROIHeads 5 | from .semantic_seg import PointRendSemSegHead 6 | from .color_augmentation import ColorAugSSDTransform 7 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_BOX_HEAD: 8 | CLS_AGNOSTIC_BBOX_REG: True 9 | ROI_MASK_HEAD: 10 | CLS_AGNOSTIC_MASK: True 11 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] 8 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_val_100",) 4 | TEST: ("coco_2017_val_100",) 5 | SOLVER: 6 | BASE_LR: 0.005 7 | STEPS: (30,) 8 | MAX_ITER: 40 9 | IMS_PER_BATCH: 4 10 | DATALOADER: 11 | NUM_WORKERS: 2 12 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] 8 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] 8 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | -------------------------------------------------------------------------------- /data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from fvcore.transforms.transform import Transform, TransformList # order them first 3 | from fvcore.transforms.transform import * 4 | from .transform import * 5 | from .augmentation import * 6 | from .augmentation_impl import * 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] 8 | -------------------------------------------------------------------------------- /data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | ] 11 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | DATALOADER: 11 | FILTER_EMPTY_ANNOTATIONS: False 12 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] 8 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] 8 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .compat import downgrade_config, upgrade_config 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable 4 | 5 | __all__ = [ 6 | "CfgNode", 7 | "get_cfg", 8 | "global_cfg", 9 | "set_global_cfg", 10 | "downgrade_config", 11 | "upgrade_config", 12 | "configurable", 13 | ] 14 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" 4 | DATASETS: 5 | TEST: ("keypoints_coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] 8 | -------------------------------------------------------------------------------- /checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .detection_checkpoint import DetectionCheckpointer 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 10 | # but still make them available here 11 | from .hooks import * 12 | from .defaults import * 13 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | RPN: 8 | BBOX_REG_LOSS_TYPE: "giou" 9 | BBOX_REG_LOSS_WEIGHT: 2.0 10 | ROI_BOX_HEAD: 11 | BBOX_REG_LOSS_TYPE: "giou" 12 | BBOX_REG_LOSS_WEIGHT: 10.0 13 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | STEPS: (30,) 9 | MAX_ITER: 40 10 | BASE_LR: 0.005 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /projects/panoptic_deeplab/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .config import add_panoptic_deeplab_config 3 | from .dataset_mapper import PanopticDeeplabDatasetMapper 4 | from .panoptic_seg import ( 5 | PanopticDeepLab, 6 | INS_EMBED_BRANCHES_REGISTRY, 7 | build_ins_embed_branch, 8 | PanopticDeepLabSemSegHead, 9 | PanopticDeepLabInsEmbedHead, 10 | ) 11 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | BASE_LR: 0.005 9 | STEPS: (30,) 10 | MAX_ITER: 40 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (210000, 250000) 12 | MAX_ITER: 270000 13 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | SOLVER: 14 | STEPS: (210000, 250000) 15 | MAX_ITER: 270000 16 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | DATASETS: 6 | TRAIN: ("keypoints_coco_2017_val_100",) 7 | TEST: ("keypoints_coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip 3 | 4 | from .backbone import Backbone 5 | from .fpn import FPN 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | # TODO can expose more resnet blocks after careful consideration 10 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and 4 | optionally load pre-trained weights as released in 5 | `MODEL_ZOO.md `_. 6 | """ 7 | from .model_zoo import get, get_config_file, get_checkpoint_url 8 | 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file"] 10 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: 10 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] 11 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100_panoptic_separated",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] 8 | -------------------------------------------------------------------------------- /model_zoo/configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | #NUM_CLASSES: 15 9 | DATASETS: 10 | TRAIN: ("coco_2017_train",) 11 | TEST: ("coco_2017_val",) 12 | SOLVER: 13 | IMS_PER_BATCH: 16 14 | BASE_LR: 0.02 15 | STEPS: (60000, 80000) 16 | MAX_ITER: 90000 17 | INPUT: 18 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 19 | VERSION: 2 20 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | CLIP_GRADIENTS: 14 | ENABLED: True 15 | CLIP_TYPE: "value" 16 | CLIP_VALUE: 1.0 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "GN" 8 | STRIDE_IN_1X1: False 9 | FPN: 10 | NORM: "GN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "GN" 16 | ROI_MASK_HEAD: 17 | NORM: "GN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | -------------------------------------------------------------------------------- /modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 5 | 6 | from .panoptic_fpn import PanopticFPN 7 | 8 | # import all the meta_arch, so they will be registered 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork 10 | from .retinanet import RetinaNet 11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head 12 | 13 | 14 | __all__ = list(globals().keys()) 15 | -------------------------------------------------------------------------------- /data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances 3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta 5 | from .pascal_voc import load_voc_instances, register_pascal_voc 6 | from . import builtin as _builtin # ensure the builtin datasets are registered 7 | 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] 8 | AUG: 9 | ENABLED: True 10 | MIN_SIZES: (700, 800) # to save some time 11 | -------------------------------------------------------------------------------- /structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa 3 | from .image_list import ImageList 4 | 5 | from .instances import Instances 6 | from .keypoints import Keypoints, heatmaps_to_keypoints 7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask 8 | from .rotated_boxes import RotatedBoxes 9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 10 | 11 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 12 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_100_panoptic_separated",) 12 | TEST: ("coco_2017_val_100_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.005 15 | STEPS: (30,) 16 | MAX_ITER: 40 17 | IMS_PER_BATCH: 4 18 | DATALOADER: 19 | NUM_WORKERS: 1 20 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | SOLVER: 13 | BASE_LR: 0.005 14 | STEPS: (30,) 15 | MAX_ITER: 40 16 | IMS_PER_BATCH: 4 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | INPUT: 16 | # no scale augmentation 17 | MIN_SIZE_TRAIN: (800, ) 18 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "SyncBN" 8 | STRIDE_IN_1X1: True 9 | FPN: 10 | NORM: "SyncBN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "SyncBN" 16 | ROI_MASK_HEAD: 17 | NORM: "SyncBN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | TEST: 23 | PRECISE_BN: 24 | ENABLED: True 25 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | # Train from random initialization. 4 | WEIGHTS: "" 5 | # It makes sense to divide by STD when training from scratch 6 | # But it seems to make no difference on the results and C2's models didn't do this. 7 | # So we keep things consistent with C2. 8 | # PIXEL_STD: [57.375, 57.12, 58.395] 9 | MASK_ON: True 10 | BACKBONE: 11 | FREEZE_AT: 0 12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 13 | # to learn what you need for training from scratch. 14 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | KEYPOINT_ON: True 4 | ROI_HEADS: 5 | NUM_CLASSES: 1 6 | ROI_BOX_HEAD: 7 | SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss 8 | RPN: 9 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 10 | # 1000 proposals per-image is found to hurt box AP. 11 | # Therefore we increase it to 1500 per-image. 12 | POST_NMS_TOPK_TRAIN: 1500 13 | DATASETS: 14 | TRAIN: ("keypoints_coco_2017_train",) 15 | TEST: ("keypoints_coco_2017_val",) 16 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | WARMUP_FACTOR: 0.3333333 17 | WARMUP_ITERS: 100 18 | STEPS: (5500, 5800) 19 | MAX_ITER: 6000 20 | TEST: 21 | EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] 22 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 7 | TEST: ("coco_2017_val_100",) 8 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 9 | SOLVER: 10 | BASE_LR: 0.005 11 | STEPS: (30,) 12 | MAX_ITER: 40 13 | IMS_PER_BATCH: 4 14 | DATALOADER: 15 | NUM_WORKERS: 2 16 | -------------------------------------------------------------------------------- /model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | ROI_MASK_HEAD: 16 | POOLER_SAMPLING_RATIO: 2 17 | POOLER_TYPE: "ROIAlign" 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WARMUP_FACTOR: 0.001 13 | WARMUP_ITERS: 300 14 | STEPS: (5500,) 15 | MAX_ITER: 7000 16 | TEST: 17 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # base uses 16 17 | WARMUP_FACTOR: 0.33333 18 | WARMUP_ITERS: 100 19 | STEPS: (11000, 11600) 20 | MAX_ITER: 12000 21 | TEST: 22 | EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] 23 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_batch_data_loader, 6 | build_detection_test_loader, 7 | build_detection_train_loader, 8 | get_detection_dataset_dicts, 9 | load_proposals_into_dataset, 10 | print_instances_class_histogram, 11 | ) 12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata 13 | from .common import DatasetFromList, MapDataset 14 | from .dataset_mapper import DatasetMapper 15 | 16 | # ensure the builtin datasets are registered 17 | from . import datasets, samplers # isort:skip 18 | 19 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 20 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_panoptic_separated",) 12 | TEST: ("coco_2017_val_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.01 15 | WARMUP_FACTOR: 0.001 16 | WARMUP_ITERS: 500 17 | STEPS: (5500,) 18 | MAX_ITER: 7000 19 | TEST: 20 | EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] 21 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator 3 | from .coco_evaluation import COCOEvaluator 4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator 5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 6 | from .lvis_evaluation import LVISEvaluator 7 | from .panoptic_evaluation import COCOPanopticEvaluator 8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 9 | from .sem_seg_evaluation import SemSegEvaluator 10 | from .testing import print_csv_format, verify_results 11 | 12 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 13 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deform_conv import DeformConv, ModulatedDeformConv 4 | from .mask_ops import paste_masks_in_image 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated 6 | from .roi_align import ROIAlign, roi_align 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 8 | from .shape_spec import ShapeSpec 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple 10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d 11 | from .aspp import ASPP 12 | 13 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 14 | -------------------------------------------------------------------------------- /model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | from detectron2 import _C 5 | 6 | 7 | def pairwise_iou_rotated(boxes1, boxes2): 8 | """ 9 | Return intersection-over-union (Jaccard index) of boxes. 10 | 11 | Both sets of boxes are expected to be in 12 | (x_center, y_center, width, height, angle) format. 13 | 14 | Arguments: 15 | boxes1 (Tensor[N, 5]) 16 | boxes2 (Tensor[M, 5]) 17 | 18 | Returns: 19 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 20 | IoU values for every element in boxes1 and boxes2 21 | """ 22 | return _C.box_iou_rotated(boxes1, boxes2) 23 | -------------------------------------------------------------------------------- /layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to complement the lack of shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1230 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v0.5_train",) 18 | TEST: ("lvis_v0.5_val",) 19 | TEST: 20 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 21 | DATALOADER: 22 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 23 | REPEAT_THRESHOLD: 0.001 24 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml: -------------------------------------------------------------------------------- 1 | # A large PanopticFPN for demo purposes. 2 | # Use GN on backbone to support semantic seg. 3 | # Use Cascade + Deform Conv to improve localization. 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" 5 | MODEL: 6 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" 7 | RESNETS: 8 | DEPTH: 101 9 | NORM: "GN" 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | STRIDE_IN_1X1: False 12 | FPN: 13 | NORM: "GN" 14 | ROI_HEADS: 15 | NAME: CascadeROIHeads 16 | ROI_BOX_HEAD: 17 | CLS_AGNOSTIC_BBOX_REG: True 18 | ROI_MASK_HEAD: 19 | NORM: "GN" 20 | RPN: 21 | POST_NMS_TOPK_TRAIN: 2000 22 | SOLVER: 23 | STEPS: (105000, 125000) 24 | MAX_ITER: 135000 25 | IMS_PER_BATCH: 32 26 | BASE_LR: 0.04 27 | -------------------------------------------------------------------------------- /modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.utils.registry import Registry 5 | 6 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 7 | META_ARCH_REGISTRY.__doc__ = """ 8 | Registry for meta-architectures, i.e. the whole model. 9 | 10 | The registered object will be called with `obj(cfg)` 11 | and expected to return a `nn.Module` object. 12 | """ 13 | 14 | 15 | def build_model(cfg): 16 | """ 17 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. 18 | Note that it does not load any weights from ``cfg``. 19 | """ 20 | meta_arch = cfg.MODEL.META_ARCHITECTURE 21 | model = META_ARCH_REGISTRY.get(meta_arch)(cfg) 22 | model.to(torch.device(cfg.MODEL.DEVICE)) 23 | return model 24 | -------------------------------------------------------------------------------- /model_zoo/configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /model_zoo/configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | INPUT: 24 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 25 | VERSION: 2 26 | -------------------------------------------------------------------------------- /modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead 3 | from .keypoint_head import ( 4 | ROI_KEYPOINT_HEAD_REGISTRY, 5 | build_keypoint_head, 6 | BaseKeypointRCNNHead, 7 | KRCNNConvDeconvUpsampleHead, 8 | ) 9 | from .mask_head import ( 10 | ROI_MASK_HEAD_REGISTRY, 11 | build_mask_head, 12 | BaseMaskRCNNHead, 13 | MaskRCNNConvUpsampleHead, 14 | ) 15 | from .roi_heads import ( 16 | ROI_HEADS_REGISTRY, 17 | ROIHeads, 18 | Res5ROIHeads, 19 | StandardROIHeads, 20 | build_roi_heads, 21 | select_foreground_proposals, 22 | ) 23 | from .rotated_fast_rcnn import RROIHeads 24 | from .fast_rcnn import FastRCNNOutputLayers 25 | 26 | from . import cascade_rcnn # isort:skip 27 | 28 | __all__ = list(globals().keys()) 29 | -------------------------------------------------------------------------------- /model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1203 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train",) 18 | TEST: ("lvis_v1_val",) 19 | SOLVER: 20 | STEPS: (120000, 160000) 21 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 22 | TEST: 23 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 24 | DATALOADER: 25 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 26 | REPEAT_THRESHOLD: 0.001 27 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | ROI_BOX_HEAD: 14 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 15 | RPN: 16 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 17 | DATASETS: 18 | TRAIN: ("keypoints_coco_2017_val",) 19 | TEST: ("keypoints_coco_2017_val",) 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | SOLVER: 23 | WARMUP_FACTOR: 0.33333333 24 | WARMUP_ITERS: 100 25 | STEPS: (5500, 5800) 26 | MAX_ITER: 6000 27 | TEST: 28 | EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] 29 | -------------------------------------------------------------------------------- /modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | 8 | The registered object will be called with `obj(cfg, input_shape)`. 9 | The call should return a `nn.Module` object. 10 | """ 11 | 12 | from . import rpn, rrpn # noqa F401 isort:skip 13 | 14 | 15 | def build_proposal_generator(cfg, input_shape): 16 | """ 17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 18 | The name can be "PrecomputedProposals" to use no proposal generator. 19 | """ 20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 21 | if name == "PrecomputedProposals": 22 | return None 23 | 24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 25 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_CONV: 4 16 | NUM_FC: 1 17 | NORM: "GN" 18 | CLS_AGNOSTIC_BBOX_REG: True 19 | ROI_MASK_HEAD: 20 | NUM_CONV: 8 21 | NORM: "GN" 22 | RPN: 23 | POST_NMS_TOPK_TRAIN: 2000 24 | SOLVER: 25 | IMS_PER_BATCH: 128 26 | STEPS: (35000, 45000) 27 | MAX_ITER: 50000 28 | BASE_LR: 0.16 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 864) 31 | MIN_SIZE_TRAIN_SAMPLING: "range" 32 | MAX_SIZE_TRAIN: 1440 33 | CROP: 34 | ENABLED: True 35 | TEST: 36 | EVAL_PERIOD: 2500 37 | -------------------------------------------------------------------------------- /model_zoo/configs/Misc/cascade_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | NUM_CLASSES: 15 14 | ROI_BOX_HEAD: 15 | NAME: "FastRCNNConvFCHead" 16 | NUM_CONV: 4 17 | NUM_FC: 1 18 | NORM: "GN" 19 | CLS_AGNOSTIC_BBOX_REG: True 20 | ROI_MASK_HEAD: 21 | NUM_CONV: 8 22 | NORM: "GN" 23 | RPN: 24 | POST_NMS_TOPK_TRAIN: 2000 25 | SOLVER: 26 | IMS_PER_BATCH: 128 27 | STEPS: (35000, 45000) 28 | MAX_ITER: 50000 29 | BASE_LR: 0.16 30 | INPUT: 31 | MIN_SIZE_TRAIN: (640, 864) 32 | MIN_SIZE_TRAIN_SAMPLING: "range" 33 | MAX_SIZE_TRAIN: 1440 34 | CROP: 35 | ENABLED: True 36 | TEST: 37 | EVAL_PERIOD: 2500 38 | -------------------------------------------------------------------------------- /model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_KEYPOINT_HEAD: 10 | POOLER_RESOLUTION: 14 11 | POOLER_SAMPLING_RATIO: 2 12 | POOLER_TYPE: "ROIAlign" 13 | # Detectron1 uses smooth L1 loss with some magic beta values. 14 | # The defaults are changed to L1 loss in Detectron2. 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 17 | POOLER_SAMPLING_RATIO: 2 18 | POOLER_TYPE: "ROIAlign" 19 | RPN: 20 | SMOOTH_L1_BETA: 0.1111 21 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 22 | # 1000 proposals per-image is found to hurt box AP. 23 | # Therefore we increase it to 1500 per-image. 24 | POST_NMS_TOPK_TRAIN: 1500 25 | DATASETS: 26 | TRAIN: ("keypoints_coco_2017_train",) 27 | TEST: ("keypoints_coco_2017_val",) 28 | -------------------------------------------------------------------------------- /projects/deeplab/build_solver.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.config import CfgNode 5 | from detectron2.solver import build_lr_scheduler as build_d2_lr_scheduler 6 | 7 | from .lr_scheduler import WarmupPolyLR 8 | 9 | 10 | def build_lr_scheduler( 11 | cfg: CfgNode, optimizer: torch.optim.Optimizer 12 | ) -> torch.optim.lr_scheduler._LRScheduler: 13 | """ 14 | Build a LR scheduler from config. 15 | """ 16 | name = cfg.SOLVER.LR_SCHEDULER_NAME 17 | if name == "WarmupPolyLR": 18 | return WarmupPolyLR( 19 | optimizer, 20 | cfg.SOLVER.MAX_ITER, 21 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 22 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 23 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 24 | power=cfg.SOLVER.POLY_LR_POWER, 25 | constant_ending=cfg.SOLVER.POLY_LR_CONSTANT_ENDING, 26 | ) 27 | else: 28 | return build_d2_lr_scheduler(cfg, optimizer) 29 | -------------------------------------------------------------------------------- /model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | # For better, more stable performance initialize from COCO 5 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 6 | MASK_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 8 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A 10 | # But there are some differences, e.g., we did not initialize the output 11 | # layer using the corresponding classes from COCO 12 | INPUT: 13 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 14 | MIN_SIZE_TRAIN_SAMPLING: "choice" 15 | MIN_SIZE_TEST: 1024 16 | MAX_SIZE_TRAIN: 2048 17 | MAX_SIZE_TEST: 2048 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instance_seg_train",) 20 | TEST: ("cityscapes_fine_instance_seg_val",) 21 | SOLVER: 22 | BASE_LR: 0.01 23 | STEPS: (18000,) 24 | MAX_ITER: 24000 25 | IMS_PER_BATCH: 8 26 | TEST: 27 | EVAL_PERIOD: 8000 28 | -------------------------------------------------------------------------------- /model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False 14 | LOSS_WEIGHT: 4.0 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 17 | RPN: 18 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 19 | DATASETS: 20 | TRAIN: ("keypoints_coco_2017_val",) 21 | TEST: ("keypoints_coco_2017_val",) 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | SOLVER: 25 | WARMUP_FACTOR: 0.33333333 26 | WARMUP_ITERS: 100 27 | STEPS: (5500, 5800) 28 | MAX_ITER: 6000 29 | TEST: 30 | EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] 31 | -------------------------------------------------------------------------------- /utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | BACKBONE_REGISTRY.__doc__ = """ 9 | Registry for backbones, which extract feature maps from images 10 | 11 | The registered object must be a callable that accepts two arguments: 12 | 13 | 1. A :class:`detectron2.config.CfgNode` 14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. 15 | 16 | Registered object must return instance of :class:`Backbone`. 17 | """ 18 | 19 | 20 | def build_backbone(cfg, input_shape=None): 21 | """ 22 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 23 | 24 | Returns: 25 | an instance of :class:`Backbone` 26 | """ 27 | if input_shape is None: 28 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 29 | 30 | backbone_name = cfg.MODEL.BACKBONE.NAME 31 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 32 | assert isinstance(backbone, Backbone) 33 | return backbone 34 | -------------------------------------------------------------------------------- /projects/deeplab/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | 5 | def add_deeplab_config(cfg): 6 | """ 7 | Add config for DeepLab. 8 | """ 9 | # We retry random cropping until no single category in semantic segmentation GT occupies more 10 | # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. 11 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 12 | # Used for `poly` learning rate schedule. 13 | cfg.SOLVER.POLY_LR_POWER = 0.9 14 | cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0 15 | # Loss type, choose from `cross_entropy`, `hard_pixel_mining`. 16 | cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE = "hard_pixel_mining" 17 | # DeepLab settings 18 | cfg.MODEL.SEM_SEG_HEAD.PROJECT_FEATURES = ["res2"] 19 | cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS = [48] 20 | cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256 21 | cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS = [6, 12, 18] 22 | cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT = 0.1 23 | # Backbone new configs 24 | cfg.MODEL.RESNETS.RES4_DILATION = 1 25 | cfg.MODEL.RESNETS.RES5_MULTI_GRID = [1, 2, 4] 26 | # ResNet stem type from: `basic`, `deeplab` 27 | cfg.MODEL.RESNETS.STEM_TYPE = "deeplab" 28 | -------------------------------------------------------------------------------- /utils/file_io.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase 2 | 3 | __all__ = ["PathManager", "PathHandler"] 4 | 5 | 6 | PathManager = PathManagerBase() 7 | """ 8 | This is a detectron2 project-specific PathManager. 9 | We try to stay away from global PathManager in fvcore as it 10 | introduces potential conflicts among other libraries. 11 | """ 12 | 13 | 14 | class Detectron2Handler(PathHandler): 15 | """ 16 | Resolve anything that's hosted under detectron2's namespace. 17 | """ 18 | 19 | PREFIX = "detectron2://" 20 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" 21 | 22 | def _get_supported_prefixes(self): 23 | return [self.PREFIX] 24 | 25 | def _get_local_path(self, path): 26 | name = path[len(self.PREFIX) :] 27 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) 28 | 29 | def _open(self, path, mode="r", **kwargs): 30 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 31 | 32 | 33 | PathManager.register_handler(HTTPURLHandler()) 34 | PathManager.register_handler(OneDrivePathHandler()) 35 | PathManager.register_handler(Detectron2Handler()) 36 | -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import importlib 3 | from pathlib import Path 4 | 5 | _PROJECTS = { 6 | "point_rend": "PointRend", 7 | "deeplab": "DeepLab", 8 | "panoptic_deeplab": "Panoptic-DeepLab", 9 | } 10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects" 11 | 12 | if _PROJECT_ROOT.is_dir(): 13 | # This is true only for in-place installation (pip install -e, setup.py develop), 14 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 15 | 16 | class _D2ProjectsFinder(importlib.abc.MetaPathFinder): 17 | def find_spec(self, name, path, target=None): 18 | if not name.startswith("detectron2.projects."): 19 | return 20 | project_name = name.split(".")[-1] 21 | project_dir = _PROJECTS.get(project_name) 22 | if not project_dir: 23 | return 24 | target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py" 25 | if not target_file.is_file(): 26 | return 27 | return importlib.util.spec_from_file_location(name, target_file) 28 | 29 | import sys 30 | 31 | sys.meta_path.append(_D2ProjectsFinder()) 32 | -------------------------------------------------------------------------------- /modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | 4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY 5 | from .backbone import ( 6 | BACKBONE_REGISTRY, 7 | FPN, 8 | Backbone, 9 | ResNet, 10 | ResNetBlockBase, 11 | build_backbone, 12 | build_resnet_backbone, 13 | make_stage, 14 | ) 15 | from .meta_arch import ( 16 | META_ARCH_REGISTRY, 17 | SEM_SEG_HEADS_REGISTRY, 18 | GeneralizedRCNN, 19 | PanopticFPN, 20 | ProposalNetwork, 21 | RetinaNet, 22 | SemanticSegmentor, 23 | build_model, 24 | build_sem_seg_head, 25 | ) 26 | from .postprocessing import detector_postprocess 27 | from .proposal_generator import ( 28 | PROPOSAL_GENERATOR_REGISTRY, 29 | build_proposal_generator, 30 | RPN_HEAD_REGISTRY, 31 | build_rpn_head, 32 | ) 33 | from .roi_heads import ( 34 | ROI_BOX_HEAD_REGISTRY, 35 | ROI_HEADS_REGISTRY, 36 | ROI_KEYPOINT_HEAD_REGISTRY, 37 | ROI_MASK_HEAD_REGISTRY, 38 | ROIHeads, 39 | StandardROIHeads, 40 | BaseMaskRCNNHead, 41 | BaseKeypointRCNNHead, 42 | FastRCNNOutputLayers, 43 | build_box_head, 44 | build_keypoint_head, 45 | build_mask_head, 46 | build_roi_heads, 47 | ) 48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA 49 | 50 | _EXCLUDE = {"ShapeSpec"} 51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 52 | -------------------------------------------------------------------------------- /model_zoo/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | NUM_CLASSES: 15 24 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | ROI_MASK_HEAD: 30 | NAME: "MaskRCNNConvUpsampleHead" 31 | NUM_CONV: 4 32 | POOLER_RESOLUTION: 14 33 | DATASETS: 34 | TRAIN: ("coco_2017_train",) 35 | TEST: ("coco_2017_val",) 36 | SOLVER: 37 | IMS_PER_BATCH: 16 38 | BASE_LR: 0.02 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | INPUT: 42 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 43 | VERSION: 2 44 | -------------------------------------------------------------------------------- /modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self) -> int: 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec( 50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 51 | ) 52 | for name in self._out_features 53 | } 54 | -------------------------------------------------------------------------------- /projects/deeplab/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class DeepLabCE(nn.Module): 7 | """ 8 | Hard pixel mining with cross entropy loss, for semantic segmentation. 9 | This is used in TensorFlow DeepLab frameworks. 10 | Paper: DeeperLab: Single-Shot Image Parser 11 | Reference: https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/deeplab/utils/train_utils.py#L33 # noqa 12 | Arguments: 13 | ignore_label: Integer, label to ignore. 14 | top_k_percent_pixels: Float, the value lies in [0.0, 1.0]. When its 15 | value < 1.0, only compute the loss for the top k percent pixels 16 | (e.g., the top 20% pixels). This is useful for hard pixel mining. 17 | weight: Tensor, a manual rescaling weight given to each class. 18 | """ 19 | 20 | def __init__(self, ignore_label=-1, top_k_percent_pixels=1.0, weight=None): 21 | super(DeepLabCE, self).__init__() 22 | self.top_k_percent_pixels = top_k_percent_pixels 23 | self.ignore_label = ignore_label 24 | self.criterion = nn.CrossEntropyLoss( 25 | weight=weight, ignore_index=ignore_label, reduction="none" 26 | ) 27 | 28 | def forward(self, logits, labels, weights=None): 29 | if weights is None: 30 | pixel_losses = self.criterion(logits, labels).contiguous().view(-1) 31 | else: 32 | # Apply per-pixel loss weights. 33 | pixel_losses = self.criterion(logits, labels) * weights 34 | pixel_losses = pixel_losses.contiguous().view(-1) 35 | if self.top_k_percent_pixels == 1.0: 36 | return pixel_losses.mean() 37 | 38 | top_k_pixels = int(self.top_k_percent_pixels * pixel_losses.numel()) 39 | pixel_losses, _ = torch.topk(pixel_losses, top_k_pixels) 40 | return pixel_losses.mean() 41 | -------------------------------------------------------------------------------- /data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | """ 13 | 14 | def __init__(self, sampler, group_ids, batch_size): 15 | """ 16 | Args: 17 | sampler (Sampler): Base sampler. 18 | group_ids (list[int]): If the sampler produces indices in range [0, N), 19 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 20 | The group ids must be a set of integers in the range [0, num_groups). 21 | batch_size (int): Size of mini-batch. 22 | """ 23 | if not isinstance(sampler, Sampler): 24 | raise ValueError( 25 | "sampler should be an instance of " 26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 27 | ) 28 | self.sampler = sampler 29 | self.group_ids = np.asarray(group_ids) 30 | assert self.group_ids.ndim == 1 31 | self.batch_size = batch_size 32 | groups = np.unique(self.group_ids).tolist() 33 | 34 | # buffer the indices of each group until batch size is reached 35 | self.buffer_per_group = {k: [] for k in groups} 36 | 37 | def __iter__(self): 38 | for idx in self.sampler: 39 | group_id = self.group_ids[idx] 40 | group_buffer = self.buffer_per_group[group_id] 41 | group_buffer.append(idx) 42 | if len(group_buffer) == self.batch_size: 43 | yield group_buffer[:] # yield a copy of the list 44 | del group_buffer[:] 45 | 46 | def __len__(self): 47 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 48 | -------------------------------------------------------------------------------- /projects/panoptic_deeplab/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | from detectron2.config import CfgNode as CN 5 | from detectron2.projects.deeplab import add_deeplab_config 6 | 7 | 8 | def add_panoptic_deeplab_config(cfg): 9 | """ 10 | Add config for Panoptic-DeepLab. 11 | """ 12 | # Reuse DeepLab config. 13 | add_deeplab_config(cfg) 14 | # Target generation parameters. 15 | cfg.INPUT.GAUSSIAN_SIGMA = 10 16 | cfg.INPUT.IGNORE_STUFF_IN_OFFSET = True 17 | cfg.INPUT.SMALL_INSTANCE_AREA = 4096 18 | cfg.INPUT.SMALL_INSTANCE_WEIGHT = 3 19 | cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC = False 20 | # Optimizer type. 21 | cfg.SOLVER.OPTIMIZER = "ADAM" 22 | # Panoptic-DeepLab semantic segmentation head. 23 | # We add an extra convolution before predictor. 24 | cfg.MODEL.SEM_SEG_HEAD.HEAD_CHANNELS = 256 25 | cfg.MODEL.SEM_SEG_HEAD.LOSS_TOP_K = 0.2 26 | # Panoptic-DeepLab instance segmentation head. 27 | cfg.MODEL.INS_EMBED_HEAD = CN() 28 | cfg.MODEL.INS_EMBED_HEAD.NAME = "PanopticDeepLabInsEmbedHead" 29 | cfg.MODEL.INS_EMBED_HEAD.IN_FEATURES = ["res2", "res3", "res5"] 30 | cfg.MODEL.INS_EMBED_HEAD.PROJECT_FEATURES = ["res2", "res3"] 31 | cfg.MODEL.INS_EMBED_HEAD.PROJECT_CHANNELS = [32, 64] 32 | cfg.MODEL.INS_EMBED_HEAD.ASPP_CHANNELS = 256 33 | cfg.MODEL.INS_EMBED_HEAD.ASPP_DILATIONS = [6, 12, 18] 34 | cfg.MODEL.INS_EMBED_HEAD.ASPP_DROPOUT = 0.1 35 | # We add an extra convolution before predictor. 36 | cfg.MODEL.INS_EMBED_HEAD.HEAD_CHANNELS = 32 37 | cfg.MODEL.INS_EMBED_HEAD.CONVS_DIM = 128 38 | cfg.MODEL.INS_EMBED_HEAD.COMMON_STRIDE = 4 39 | cfg.MODEL.INS_EMBED_HEAD.NORM = "SyncBN" 40 | cfg.MODEL.INS_EMBED_HEAD.CENTER_LOSS_WEIGHT = 200.0 41 | cfg.MODEL.INS_EMBED_HEAD.OFFSET_LOSS_WEIGHT = 0.01 42 | # Panoptic-DeepLab post-processing setting. 43 | cfg.MODEL.PANOPTIC_DEEPLAB = CN() 44 | # Stuff area limit, ignore stuff region below this number. 45 | cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA = 2048 46 | cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD = 0.1 47 | cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL = 7 48 | cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE = 200 49 | # If set to False, Panoptic-DeepLab will not evaluate instance segmentation. 50 | cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES = True 51 | -------------------------------------------------------------------------------- /projects/point_rend/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | from detectron2.config import CfgNode as CN 5 | 6 | 7 | def add_pointrend_config(cfg): 8 | """ 9 | Add config for PointRend. 10 | """ 11 | # We retry random cropping until no single category in semantic segmentation GT occupies more 12 | # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. 13 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 14 | # Color augmentatition from SSD paper for semantic segmentation model during training. 15 | cfg.INPUT.COLOR_AUG_SSD = False 16 | 17 | # Names of the input feature maps to be used by a coarse mask head. 18 | cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",) 19 | cfg.MODEL.ROI_MASK_HEAD.FC_DIM = 1024 20 | cfg.MODEL.ROI_MASK_HEAD.NUM_FC = 2 21 | # The side size of a coarse mask head prediction. 22 | cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION = 7 23 | # True if point head is used. 24 | cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = False 25 | 26 | cfg.MODEL.POINT_HEAD = CN() 27 | cfg.MODEL.POINT_HEAD.NAME = "StandardPointHead" 28 | cfg.MODEL.POINT_HEAD.NUM_CLASSES = 80 29 | # Names of the input feature maps to be used by a mask point head. 30 | cfg.MODEL.POINT_HEAD.IN_FEATURES = ("p2",) 31 | # Number of points sampled during training for a mask point head. 32 | cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS = 14 * 14 33 | # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the 34 | # original paper. 35 | cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO = 3 36 | # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in 37 | # the original paper. 38 | cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO = 0.75 39 | # Number of subdivision steps during inference. 40 | cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS = 5 41 | # Maximum number of points selected at each subdivision step (N). 42 | cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS = 28 * 28 43 | cfg.MODEL.POINT_HEAD.FC_DIM = 256 44 | cfg.MODEL.POINT_HEAD.NUM_FC = 3 45 | cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK = False 46 | # If True, then coarse prediction features are used as inout for each layer in PointRend's MLP. 47 | cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER = True 48 | cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME = "SemSegFPNHead" 49 | -------------------------------------------------------------------------------- /modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.layers import nonzero_tuple 5 | 6 | __all__ = ["subsample_labels"] 7 | 8 | 9 | def subsample_labels( 10 | labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int 11 | ): 12 | """ 13 | Return `num_samples` (or fewer, if not enough found) 14 | random samples from `labels` which is a mixture of positives & negatives. 15 | It will try to return as many positives as possible without 16 | exceeding `positive_fraction * num_samples`, and then try to 17 | fill the remaining slots with negatives. 18 | 19 | Args: 20 | labels (Tensor): (N, ) label vector with values: 21 | * -1: ignore 22 | * bg_label: background ("negative") class 23 | * otherwise: one or more foreground ("positive") classes 24 | num_samples (int): The total number of labels with value >= 0 to return. 25 | Values that are not sampled will be filled with -1 (ignore). 26 | positive_fraction (float): The number of subsampled labels with values > 0 27 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 28 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 29 | In order words, if there are not enough positives, the sample is filled with 30 | negatives. If there are also not enough negatives, then as many elements are 31 | sampled as is possible. 32 | bg_label (int): label index of background ("negative") class. 33 | 34 | Returns: 35 | pos_idx, neg_idx (Tensor): 36 | 1D vector of indices. The total length of both is `num_samples` or fewer. 37 | """ 38 | positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] 39 | negative = nonzero_tuple(labels == bg_label)[0] 40 | 41 | num_pos = int(num_samples * positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = num_samples - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx = positive[perm1] 53 | neg_idx = negative[perm2] 54 | return pos_idx, neg_idx 55 | -------------------------------------------------------------------------------- /projects/deeplab/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import math 3 | from typing import List 4 | import torch 5 | 6 | from detectron2.solver.lr_scheduler import _get_warmup_factor_at_iter 7 | 8 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes 9 | # only on epoch boundaries. We typically use iteration based schedules instead. 10 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean 11 | # "iteration" instead. 12 | 13 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating 14 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. 15 | 16 | 17 | class WarmupPolyLR(torch.optim.lr_scheduler._LRScheduler): 18 | """ 19 | Poly learning rate schedule used to train DeepLab. 20 | Paper: DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, 21 | Atrous Convolution, and Fully Connected CRFs. 22 | Reference: https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/utils/train_utils.py#L337 # noqa 23 | """ 24 | 25 | def __init__( 26 | self, 27 | optimizer: torch.optim.Optimizer, 28 | max_iters: int, 29 | warmup_factor: float = 0.001, 30 | warmup_iters: int = 1000, 31 | warmup_method: str = "linear", 32 | last_epoch: int = -1, 33 | power: float = 0.9, 34 | constant_ending: float = 0.0, 35 | ): 36 | self.max_iters = max_iters 37 | self.warmup_factor = warmup_factor 38 | self.warmup_iters = warmup_iters 39 | self.warmup_method = warmup_method 40 | self.power = power 41 | self.constant_ending = constant_ending 42 | super().__init__(optimizer, last_epoch) 43 | 44 | def get_lr(self) -> List[float]: 45 | warmup_factor = _get_warmup_factor_at_iter( 46 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 47 | ) 48 | if self.constant_ending > 0 and warmup_factor == 1.0: 49 | # Constant ending lr. 50 | if ( 51 | math.pow((1.0 - self.last_epoch / self.max_iters), self.power) 52 | < self.constant_ending 53 | ): 54 | return [base_lr * self.constant_ending for base_lr in self.base_lrs] 55 | return [ 56 | base_lr * warmup_factor * math.pow((1.0 - self.last_epoch / self.max_iters), self.power) 57 | for base_lr in self.base_lrs 58 | ] 59 | 60 | def _compute_values(self) -> List[float]: 61 | # The new interface 62 | return self.get_lr() 63 | -------------------------------------------------------------------------------- /evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import OrderedDict 7 | from collections.abc import Mapping 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 19 | logger = logging.getLogger(__name__) 20 | for task, res in results.items(): 21 | # Don't print "AP-category" metrics since they are usually not tracked. 22 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 23 | logger.info("copypaste: Task: {}".format(task)) 24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 26 | 27 | 28 | def verify_results(cfg, results): 29 | """ 30 | Args: 31 | results (OrderedDict[dict]): task_name -> {metric -> score} 32 | 33 | Returns: 34 | bool: whether the verification succeeds or not 35 | """ 36 | expected_results = cfg.TEST.EXPECTED_RESULTS 37 | if not len(expected_results): 38 | return True 39 | 40 | ok = True 41 | for task, metric, expected, tolerance in expected_results: 42 | actual = results[task].get(metric, None) 43 | if actual is None: 44 | ok = False 45 | continue 46 | if not np.isfinite(actual): 47 | ok = False 48 | continue 49 | diff = abs(actual - expected) 50 | if diff > tolerance: 51 | ok = False 52 | 53 | logger = logging.getLogger(__name__) 54 | if not ok: 55 | logger.error("Result verification failed!") 56 | logger.error("Expected Results: " + str(expected_results)) 57 | logger.error("Actual Results: " + pprint.pformat(results)) 58 | 59 | sys.exit(1) 60 | else: 61 | logger.info("Results verification passed.") 62 | return ok 63 | 64 | 65 | def flatten_results_dict(results): 66 | """ 67 | Expand a hierarchical dict of scalars into a flat dict of scalars. 68 | If results[k1][k2][k3] = v, the returned dict will have the entry 69 | {"k1/k2/k3": v}. 70 | 71 | Args: 72 | results (dict): 73 | """ 74 | r = {} 75 | for k, v in results.items(): 76 | if isinstance(v, Mapping): 77 | v = flatten_results_dict(v) 78 | for kk, vv in v.items(): 79 | r[k + "/" + kk] = vv 80 | else: 81 | r[k] = v 82 | return r 83 | -------------------------------------------------------------------------------- /utils/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | import torch 7 | 8 | __all__ = ["retry_if_cuda_oom"] 9 | 10 | 11 | @contextmanager 12 | def _ignore_torch_cuda_oom(): 13 | """ 14 | A context which ignores CUDA OOM exception from pytorch. 15 | """ 16 | try: 17 | yield 18 | except RuntimeError as e: 19 | # NOTE: the string may change? 20 | if "CUDA out of memory. " in str(e): 21 | pass 22 | else: 23 | raise 24 | 25 | 26 | def retry_if_cuda_oom(func): 27 | """ 28 | Makes a function retry itself after encountering 29 | pytorch's CUDA OOM error. 30 | It will first retry after calling `torch.cuda.empty_cache()`. 31 | 32 | If that still fails, it will then retry by trying to convert inputs to CPUs. 33 | In this case, it expects the function to dispatch to CPU implementation. 34 | The return values may become CPU tensors as well and it's user's 35 | responsibility to convert it back to CUDA tensor if needed. 36 | 37 | Args: 38 | func: a stateless callable that takes tensor-like objects as arguments 39 | 40 | Returns: 41 | a callable which retries `func` if OOM is encountered. 42 | 43 | Examples: 44 | :: 45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 46 | # output may be on CPU even if inputs are on GPU 47 | 48 | Note: 49 | 1. When converting inputs to CPU, it will only look at each argument and check 50 | if it has `.device` and `.to` for conversion. Nested structures of tensors 51 | are not supported. 52 | 53 | 2. Since the function might be called more than once, it has to be 54 | stateless. 55 | """ 56 | 57 | def maybe_to_cpu(x): 58 | try: 59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 60 | except AttributeError: 61 | like_gpu_tensor = False 62 | if like_gpu_tensor: 63 | return x.to(device="cpu") 64 | else: 65 | return x 66 | 67 | @wraps(func) 68 | def wrapped(*args, **kwargs): 69 | with _ignore_torch_cuda_oom(): 70 | return func(*args, **kwargs) 71 | 72 | # Clear cache and retry 73 | torch.cuda.empty_cache() 74 | with _ignore_torch_cuda_oom(): 75 | return func(*args, **kwargs) 76 | 77 | # Try on CPU. This slows down the code significantly, therefore print a notice. 78 | logger = logging.getLogger(__name__) 79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) 80 | new_args = (maybe_to_cpu(x) for x in args) 81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 82 | return func(*new_args, **new_kwargs) 83 | 84 | return wrapped 85 | -------------------------------------------------------------------------------- /projects/point_rend/color_augmentation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import random 4 | import cv2 5 | from fvcore.transforms.transform import Transform 6 | 7 | 8 | class ColorAugSSDTransform(Transform): 9 | """ 10 | A color related data augmentation used in Single Shot Multibox Detector (SSD). 11 | 12 | Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, 13 | Scott Reed, Cheng-Yang Fu, Alexander C. Berg. 14 | SSD: Single Shot MultiBox Detector. ECCV 2016. 15 | 16 | Implementation based on: 17 | 18 | https://github.com/weiliu89/caffe/blob 19 | /4817bf8b4200b35ada8ed0dc378dceaf38c539e4 20 | /src/caffe/util/im_transforms.cpp 21 | 22 | https://github.com/chainer/chainercv/blob 23 | /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv 24 | /links/model/ssd/transforms.py 25 | """ 26 | 27 | def __init__( 28 | self, 29 | img_format, 30 | brightness_delta=32, 31 | contrast_low=0.5, 32 | contrast_high=1.5, 33 | saturation_low=0.5, 34 | saturation_high=1.5, 35 | hue_delta=18, 36 | ): 37 | super().__init__() 38 | assert img_format in ["BGR", "RGB"] 39 | self.is_rgb = img_format == "RGB" 40 | del img_format 41 | self._set_attributes(locals()) 42 | 43 | def apply_coords(self, coords): 44 | return coords 45 | 46 | def apply_segmentation(self, segmentation): 47 | return segmentation 48 | 49 | def apply_image(self, img, interp=None): 50 | if self.is_rgb: 51 | img = img[:, :, [2, 1, 0]] 52 | img = self.brightness(img) 53 | if random.randrange(2): 54 | img = self.contrast(img) 55 | img = self.saturation(img) 56 | img = self.hue(img) 57 | else: 58 | img = self.saturation(img) 59 | img = self.hue(img) 60 | img = self.contrast(img) 61 | if self.is_rgb: 62 | img = img[:, :, [2, 1, 0]] 63 | return img 64 | 65 | def convert(self, img, alpha=1, beta=0): 66 | img = img.astype(np.float32) * alpha + beta 67 | img = np.clip(img, 0, 255) 68 | return img.astype(np.uint8) 69 | 70 | def brightness(self, img): 71 | if random.randrange(2): 72 | return self.convert( 73 | img, beta=random.uniform(-self.brightness_delta, self.brightness_delta) 74 | ) 75 | return img 76 | 77 | def contrast(self, img): 78 | if random.randrange(2): 79 | return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high)) 80 | return img 81 | 82 | def saturation(self, img): 83 | if random.randrange(2): 84 | img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 85 | img[:, :, 1] = self.convert( 86 | img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high) 87 | ) 88 | return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) 89 | return img 90 | 91 | def hue(self, img): 92 | if random.randrange(2): 93 | img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 94 | img[:, :, 0] = ( 95 | img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta) 96 | ) % 180 97 | return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) 98 | return img 99 | -------------------------------------------------------------------------------- /data/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import numpy as np 5 | import os 6 | import xml.etree.ElementTree as ET 7 | from typing import List, Tuple, Union 8 | 9 | from detectron2.data import DatasetCatalog, MetadataCatalog 10 | from detectron2.structures import BoxMode 11 | from detectron2.utils.file_io import PathManager 12 | 13 | __all__ = ["load_voc_instances", "register_pascal_voc"] 14 | 15 | 16 | # fmt: off 17 | CLASS_NAMES = ( 18 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 19 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 20 | "pottedplant", "sheep", "sofa", "train", "tvmonitor" 21 | ) 22 | # fmt: on 23 | 24 | 25 | def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]): 26 | """ 27 | Load Pascal VOC detection annotations to Detectron2 format. 28 | 29 | Args: 30 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 31 | split (str): one of "train", "test", "val", "trainval" 32 | class_names: list or tuple of class names 33 | """ 34 | with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: 35 | fileids = np.loadtxt(f, dtype=np.str) 36 | 37 | # Needs to read many small annotation files. Makes sense at local 38 | annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) 39 | dicts = [] 40 | for fileid in fileids: 41 | anno_file = os.path.join(annotation_dirname, fileid + ".xml") 42 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") 43 | 44 | with PathManager.open(anno_file) as f: 45 | tree = ET.parse(f) 46 | 47 | r = { 48 | "file_name": jpeg_file, 49 | "image_id": fileid, 50 | "height": int(tree.findall("./size/height")[0].text), 51 | "width": int(tree.findall("./size/width")[0].text), 52 | } 53 | instances = [] 54 | 55 | for obj in tree.findall("object"): 56 | cls = obj.find("name").text 57 | # We include "difficult" samples in training. 58 | # Based on limited experiments, they don't hurt accuracy. 59 | # difficult = int(obj.find("difficult").text) 60 | # if difficult == 1: 61 | # continue 62 | bbox = obj.find("bndbox") 63 | bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] 64 | # Original annotations are integers in the range [1, W or H] 65 | # Assuming they mean 1-based pixel indices (inclusive), 66 | # a box with annotation (xmin=1, xmax=W) covers the whole image. 67 | # In coordinate space this is represented by (xmin=0, xmax=W) 68 | bbox[0] -= 1.0 69 | bbox[1] -= 1.0 70 | instances.append( 71 | {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} 72 | ) 73 | r["annotations"] = instances 74 | dicts.append(r) 75 | return dicts 76 | 77 | 78 | def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES): 79 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names)) 80 | MetadataCatalog.get(name).set( 81 | thing_classes=list(class_names), dirname=dirname, year=year, split=split 82 | ) 83 | -------------------------------------------------------------------------------- /layers/blocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import fvcore.nn.weight_init as weight_init 5 | from torch import nn 6 | 7 | from .batch_norm import FrozenBatchNorm2d, get_norm 8 | from .wrappers import Conv2d 9 | 10 | 11 | """ 12 | CNN building blocks. 13 | """ 14 | 15 | 16 | class CNNBlockBase(nn.Module): 17 | """ 18 | A CNN block is assumed to have input channels, output channels and a stride. 19 | The input and output of `forward()` method must be NCHW tensors. 20 | The method can perform arbitrary computation but must match the given 21 | channels and stride specification. 22 | 23 | Attribute: 24 | in_channels (int): 25 | out_channels (int): 26 | stride (int): 27 | """ 28 | 29 | def __init__(self, in_channels, out_channels, stride): 30 | """ 31 | The `__init__` method of any subclass should also contain these arguments. 32 | 33 | Args: 34 | in_channels (int): 35 | out_channels (int): 36 | stride (int): 37 | """ 38 | super().__init__() 39 | self.in_channels = in_channels 40 | self.out_channels = out_channels 41 | self.stride = stride 42 | 43 | def freeze(self): 44 | """ 45 | Make this block not trainable. 46 | This method sets all parameters to `requires_grad=False`, 47 | and convert all BatchNorm layers to FrozenBatchNorm 48 | 49 | Returns: 50 | the block itself 51 | """ 52 | for p in self.parameters(): 53 | p.requires_grad = False 54 | FrozenBatchNorm2d.convert_frozen_batchnorm(self) 55 | return self 56 | 57 | 58 | class DepthwiseSeparableConv2d(nn.Module): 59 | """ 60 | A kxk depthwise convolution + a 1x1 convolution. 61 | 62 | In :paper:`xception`, norm & activation are applied on the second conv. 63 | :paper:`mobilenet` uses norm & activation on both convs. 64 | """ 65 | 66 | def __init__( 67 | self, 68 | in_channels, 69 | out_channels, 70 | kernel_size=3, 71 | padding=1, 72 | *, 73 | norm1=None, 74 | activation1=None, 75 | norm2=None, 76 | activation2=None, 77 | ): 78 | """ 79 | Args: 80 | norm1, norm2 (str or callable): normalization for the two conv layers. 81 | activation1, activation2 (callable(Tensor) -> Tensor): activation 82 | function for the two conv layers. 83 | """ 84 | super().__init__() 85 | self.depthwise = Conv2d( 86 | in_channels, 87 | in_channels, 88 | kernel_size=kernel_size, 89 | padding=padding, 90 | groups=in_channels, 91 | bias=not norm1, 92 | norm=get_norm(norm1, in_channels), 93 | activation=activation1, 94 | ) 95 | self.pointwise = Conv2d( 96 | in_channels, 97 | out_channels, 98 | kernel_size=1, 99 | bias=not norm2, 100 | norm=get_norm(norm2, out_channels), 101 | activation=activation2, 102 | ) 103 | 104 | # default initialization 105 | weight_init.c2_msra_fill(self.depthwise) 106 | weight_init.c2_msra_fill(self.pointwise) 107 | 108 | def forward(self, x): 109 | return self.pointwise(self.depthwise(x)) 110 | -------------------------------------------------------------------------------- /layers/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from detectron2 import _C 9 | 10 | 11 | class _ROIAlignRotated(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_rotated_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | (rois,) = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_rotated_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None, None 45 | 46 | 47 | roi_align_rotated = _ROIAlignRotated.apply 48 | 49 | 50 | class ROIAlignRotated(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | """ 53 | Args: 54 | output_size (tuple): h, w 55 | spatial_scale (float): scale the input boxes by this number 56 | sampling_ratio (int): number of inputs samples to take for each output 57 | sample. 0 to take samples densely. 58 | 59 | Note: 60 | ROIAlignRotated supports continuous coordinate by default: 61 | Given a continuous coordinate c, its two neighboring pixel indices (in our 62 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 63 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 64 | from the underlying signal at continuous coordinates 0.5 and 1.5). 65 | """ 66 | super(ROIAlignRotated, self).__init__() 67 | self.output_size = output_size 68 | self.spatial_scale = spatial_scale 69 | self.sampling_ratio = sampling_ratio 70 | 71 | def forward(self, input, rois): 72 | """ 73 | Args: 74 | input: NCHW images 75 | rois: Bx6 boxes. First column is the index into N. 76 | The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). 77 | """ 78 | assert rois.dim() == 2 and rois.size(1) == 6 79 | orig_dtype = input.dtype 80 | if orig_dtype == torch.float16: 81 | input = input.float() 82 | rois = rois.float() 83 | return roi_align_rotated( 84 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 85 | ).to(dtype=orig_dtype) 86 | 87 | def __repr__(self): 88 | tmpstr = self.__class__.__name__ + "(" 89 | tmpstr += "output_size=" + str(self.output_size) 90 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 91 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 92 | tmpstr += ")" 93 | return tmpstr 94 | -------------------------------------------------------------------------------- /checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import pickle 3 | from fvcore.common.checkpoint import Checkpointer 4 | 5 | import detectron2.utils.comm as comm 6 | from detectron2.utils.file_io import PathManager 7 | 8 | from .c2_model_loading import align_and_update_state_dicts 9 | 10 | 11 | class DetectionCheckpointer(Checkpointer): 12 | """ 13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 14 | model zoo, and apply conversions for legacy models. 15 | """ 16 | 17 | def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): 18 | is_main_process = comm.is_main_process() 19 | super().__init__( 20 | model, 21 | save_dir, 22 | save_to_disk=is_main_process if save_to_disk is None else save_to_disk, 23 | **checkpointables, 24 | ) 25 | if hasattr(self, "path_manager"): 26 | self.path_manager = PathManager 27 | else: 28 | # This could only happen for open source 29 | # TODO remove after upgrading fvcore 30 | from fvcore.common.file_io import PathManager as g_PathManager 31 | 32 | for handler in PathManager._path_handlers.values(): 33 | try: 34 | g_PathManager.register_handler(handler) 35 | except KeyError: 36 | pass 37 | 38 | def _load_file(self, filename): 39 | if filename.endswith(".pkl"): 40 | with PathManager.open(filename, "rb") as f: 41 | data = pickle.load(f, encoding="latin1") 42 | if "model" in data and "__author__" in data: 43 | # file is in Detectron2 model zoo format 44 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 45 | return data 46 | else: 47 | # assume file is from Caffe2 / Detectron1 model zoo 48 | if "blobs" in data: 49 | # Detection models have "blobs", but ImageNet models don't 50 | data = data["blobs"] 51 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 52 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 53 | 54 | loaded = super()._load_file(filename) # load native pth checkpoint 55 | if "model" not in loaded: 56 | loaded = {"model": loaded} 57 | return loaded 58 | 59 | def _load_model(self, checkpoint): 60 | if checkpoint.get("matching_heuristics", False): 61 | self._convert_ndarray_to_tensor(checkpoint["model"]) 62 | # convert weights by name-matching heuristics 63 | model_state_dict = self.model.state_dict() 64 | align_and_update_state_dicts( 65 | model_state_dict, 66 | checkpoint["model"], 67 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2", 68 | ) 69 | checkpoint["model"] = model_state_dict 70 | # for non-caffe2 models, use standard ways to load it 71 | incompatible = super()._load_model(checkpoint) 72 | if incompatible is None: # support older versions of fvcore 73 | return None 74 | 75 | model_buffers = dict(self.model.named_buffers(recurse=False)) 76 | for k in ["pixel_mean", "pixel_std"]: 77 | # Ignore missing key message about pixel_mean/std. 78 | # Though they may be missing in old checkpoints, they will be correctly 79 | # initialized from config anyway. 80 | if k in model_buffers: 81 | try: 82 | incompatible.missing_keys.remove(k) 83 | except ValueError: 84 | pass 85 | return incompatible 86 | -------------------------------------------------------------------------------- /engine/launch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import torch 4 | import torch.distributed as dist 5 | import torch.multiprocessing as mp 6 | 7 | from detectron2.utils import comm 8 | 9 | __all__ = ["launch"] 10 | 11 | 12 | def _find_free_port(): 13 | import socket 14 | 15 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 16 | # Binding to port 0 will cause the OS to find an available port for us 17 | sock.bind(("", 0)) 18 | port = sock.getsockname()[1] 19 | sock.close() 20 | # NOTE: there is still a chance the port could be taken by other processes. 21 | return port 22 | 23 | 24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()): 25 | """ 26 | Launch multi-gpu or distributed training. 27 | This function must be called on all machines involved in the training. 28 | It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. 29 | 30 | Args: 31 | main_func: a function that will be called by `main_func(*args)` 32 | num_gpus_per_machine (int): number of GPUs per machine 33 | num_machines (int): the total number of machines 34 | machine_rank (int): the rank of this machine 35 | dist_url (str): url to connect to for distributed jobs, including protocol 36 | e.g. "tcp://127.0.0.1:8686". 37 | Can be set to "auto" to automatically select a free port on localhost 38 | args (tuple): arguments passed to main_func 39 | """ 40 | world_size = num_machines * num_gpus_per_machine 41 | if world_size > 1: 42 | # https://github.com/pytorch/pytorch/pull/14391 43 | # TODO prctl in spawned processes 44 | 45 | if dist_url == "auto": 46 | assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." 47 | port = _find_free_port() 48 | dist_url = f"tcp://127.0.0.1:{port}" 49 | if num_machines > 1 and dist_url.startswith("file://"): 50 | logger = logging.getLogger(__name__) 51 | logger.warning( 52 | "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" 53 | ) 54 | 55 | mp.spawn( 56 | _distributed_worker, 57 | nprocs=num_gpus_per_machine, 58 | args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args), 59 | daemon=False, 60 | ) 61 | else: 62 | main_func(*args) 63 | 64 | 65 | def _distributed_worker( 66 | local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args 67 | ): 68 | assert torch.cuda.is_available(), "cuda is not available. Please check your installation." 69 | global_rank = machine_rank * num_gpus_per_machine + local_rank 70 | try: 71 | dist.init_process_group( 72 | backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank 73 | ) 74 | except Exception as e: 75 | logger = logging.getLogger(__name__) 76 | logger.error("Process group URL: {}".format(dist_url)) 77 | raise e 78 | # synchronize is needed here to prevent a possible timeout after calling init_process_group 79 | # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 80 | comm.synchronize() 81 | 82 | assert num_gpus_per_machine <= torch.cuda.device_count() 83 | torch.cuda.set_device(local_rank) 84 | 85 | # Setup the local process group (which contains ranks within the same machine) 86 | assert comm._LOCAL_PROCESS_GROUP is None 87 | num_machines = world_size // num_gpus_per_machine 88 | for i in range(num_machines): 89 | ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) 90 | pg = dist.new_group(ranks_on_i) 91 | if i == machine_rank: 92 | comm._LOCAL_PROCESS_GROUP = pg 93 | 94 | main_func(*args) 95 | -------------------------------------------------------------------------------- /projects/point_rend/coarse_mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import fvcore.nn.weight_init as weight_init 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from detectron2.layers import Conv2d, ShapeSpec 8 | from detectron2.modeling import ROI_MASK_HEAD_REGISTRY 9 | 10 | 11 | @ROI_MASK_HEAD_REGISTRY.register() 12 | class CoarseMaskHead(nn.Module): 13 | """ 14 | A mask head with fully connected layers. Given pooled features it first reduces channels and 15 | spatial dimensions with conv layers and then uses FC layers to predict coarse masks analogously 16 | to the standard box head. 17 | """ 18 | 19 | def __init__(self, cfg, input_shape: ShapeSpec): 20 | """ 21 | The following attributes are parsed from config: 22 | conv_dim: the output dimension of the conv layers 23 | fc_dim: the feature dimenstion of the FC layers 24 | num_fc: the number of FC layers 25 | output_side_resolution: side resolution of the output square mask prediction 26 | """ 27 | super(CoarseMaskHead, self).__init__() 28 | 29 | # fmt: off 30 | self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES 31 | conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM 32 | self.fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM 33 | num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC 34 | self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION 35 | self.input_channels = input_shape.channels 36 | self.input_h = input_shape.height 37 | self.input_w = input_shape.width 38 | # fmt: on 39 | 40 | self.conv_layers = [] 41 | if self.input_channels > conv_dim: 42 | self.reduce_channel_dim_conv = Conv2d( 43 | self.input_channels, 44 | conv_dim, 45 | kernel_size=1, 46 | stride=1, 47 | padding=0, 48 | bias=True, 49 | activation=F.relu, 50 | ) 51 | self.conv_layers.append(self.reduce_channel_dim_conv) 52 | 53 | self.reduce_spatial_dim_conv = Conv2d( 54 | conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu 55 | ) 56 | self.conv_layers.append(self.reduce_spatial_dim_conv) 57 | 58 | input_dim = conv_dim * self.input_h * self.input_w 59 | input_dim //= 4 60 | 61 | self.fcs = [] 62 | for k in range(num_fc): 63 | fc = nn.Linear(input_dim, self.fc_dim) 64 | self.add_module("coarse_mask_fc{}".format(k + 1), fc) 65 | self.fcs.append(fc) 66 | input_dim = self.fc_dim 67 | 68 | output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution 69 | 70 | self.prediction = nn.Linear(self.fc_dim, output_dim) 71 | # use normal distribution initialization for mask prediction layer 72 | nn.init.normal_(self.prediction.weight, std=0.001) 73 | nn.init.constant_(self.prediction.bias, 0) 74 | 75 | for layer in self.conv_layers: 76 | weight_init.c2_msra_fill(layer) 77 | for layer in self.fcs: 78 | weight_init.c2_xavier_fill(layer) 79 | 80 | def forward(self, x): 81 | # unlike BaseMaskRCNNHead, this head only outputs intermediate 82 | # features, because the features will be used later by PointHead. 83 | N = x.shape[0] 84 | x = x.view(N, self.input_channels, self.input_h, self.input_w) 85 | for layer in self.conv_layers: 86 | x = layer(x) 87 | x = torch.flatten(x, start_dim=1) 88 | for layer in self.fcs: 89 | x = F.relu(layer(x)) 90 | return self.prediction(x).view( 91 | N, self.num_classes, self.output_side_resolution, self.output_side_resolution 92 | ) 93 | -------------------------------------------------------------------------------- /modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | from detectron2.layers import paste_masks_in_image 6 | from detectron2.structures import Instances 7 | from detectron2.utils.memory import retry_if_cuda_oom 8 | 9 | 10 | # perhaps should rename to "resize_instance" 11 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): 12 | """ 13 | Resize the output instances. 14 | The input images are often resized when entering an object detector. 15 | As a result, we often need the outputs of the detector in a different 16 | resolution from its inputs. 17 | 18 | This function will resize the raw outputs of an R-CNN detector 19 | to produce outputs according to the desired output resolution. 20 | 21 | Args: 22 | results (Instances): the raw outputs from the detector. 23 | `results.image_size` contains the input image resolution the detector sees. 24 | This object might be modified in-place. 25 | output_height, output_width: the desired output resolution. 26 | 27 | Returns: 28 | Instances: the resized output from the model, based on the output resolution 29 | """ 30 | 31 | # Converts integer tensors to float temporaries 32 | # to ensure true division is performed when 33 | # computing scale_x and scale_y. 34 | if isinstance(output_width, torch.Tensor): 35 | output_width_tmp = output_width.float() 36 | else: 37 | output_width_tmp = output_width 38 | 39 | if isinstance(output_height, torch.Tensor): 40 | output_height_tmp = output_height.float() 41 | else: 42 | output_height_tmp = output_height 43 | 44 | scale_x, scale_y = ( 45 | output_width_tmp / results.image_size[1], 46 | output_height_tmp / results.image_size[0], 47 | ) 48 | results = Instances((output_height, output_width), **results.get_fields()) 49 | 50 | if results.has("pred_boxes"): 51 | output_boxes = results.pred_boxes 52 | elif results.has("proposal_boxes"): 53 | output_boxes = results.proposal_boxes 54 | 55 | output_boxes.scale(scale_x, scale_y) 56 | output_boxes.clip(results.image_size) 57 | 58 | results = results[output_boxes.nonempty()] 59 | 60 | if results.has("pred_masks"): 61 | results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( 62 | results.pred_masks[:, 0, :, :], # N, 1, M, M 63 | results.pred_boxes, 64 | results.image_size, 65 | threshold=mask_threshold, 66 | ) 67 | 68 | if results.has("pred_keypoints"): 69 | results.pred_keypoints[:, :, 0] *= scale_x 70 | results.pred_keypoints[:, :, 1] *= scale_y 71 | 72 | return results 73 | 74 | 75 | def sem_seg_postprocess(result, img_size, output_height, output_width): 76 | """ 77 | Return semantic segmentation predictions in the original resolution. 78 | 79 | The input images are often resized when entering semantic segmentor. Moreover, in same 80 | cases, they also padded inside segmentor to be divisible by maximum network stride. 81 | As a result, we often need the predictions of the segmentor in a different 82 | resolution from its inputs. 83 | 84 | Args: 85 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 86 | where C is the number of classes, and H, W are the height and width of the prediction. 87 | img_size (tuple): image size that segmentor is taking as input. 88 | output_height, output_width: the desired output resolution. 89 | 90 | Returns: 91 | semantic segmentation prediction (Tensor): A tensor of the shape 92 | (C, output_height, output_width) that contains per-pixel soft predictions. 93 | """ 94 | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) 95 | result = F.interpolate( 96 | result, size=(output_height, output_width), mode="bilinear", align_corners=False 97 | )[0] 98 | return result 99 | -------------------------------------------------------------------------------- /utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ 4 | An awesome colormap for really neat visualizations. 5 | Copied from Detectron, and removed gray colors. 6 | """ 7 | 8 | import numpy as np 9 | 10 | __all__ = ["colormap", "random_color"] 11 | 12 | # fmt: off 13 | # RGB: 14 | _COLORS = np.array( 15 | [ 16 | 0.000, 0.447, 0.741, 17 | 0.850, 0.325, 0.098, 18 | 0.929, 0.694, 0.125, 19 | 0.494, 0.184, 0.556, 20 | 0.466, 0.674, 0.188, 21 | 0.301, 0.745, 0.933, 22 | 0.635, 0.078, 0.184, 23 | 0.300, 0.300, 0.300, 24 | 0.600, 0.600, 0.600, 25 | 1.000, 0.000, 0.000, 26 | 1.000, 0.500, 0.000, 27 | 0.749, 0.749, 0.000, 28 | 0.000, 1.000, 0.000, 29 | 0.000, 0.000, 1.000, 30 | 0.667, 0.000, 1.000, 31 | 0.333, 0.333, 0.000, 32 | 0.333, 0.667, 0.000, 33 | 0.333, 1.000, 0.000, 34 | 0.667, 0.333, 0.000, 35 | 0.667, 0.667, 0.000, 36 | 0.667, 1.000, 0.000, 37 | 1.000, 0.333, 0.000, 38 | 1.000, 0.667, 0.000, 39 | 1.000, 1.000, 0.000, 40 | 0.000, 0.333, 0.500, 41 | 0.000, 0.667, 0.500, 42 | 0.000, 1.000, 0.500, 43 | 0.333, 0.000, 0.500, 44 | 0.333, 0.333, 0.500, 45 | 0.333, 0.667, 0.500, 46 | 0.333, 1.000, 0.500, 47 | 0.667, 0.000, 0.500, 48 | 0.667, 0.333, 0.500, 49 | 0.667, 0.667, 0.500, 50 | 0.667, 1.000, 0.500, 51 | 1.000, 0.000, 0.500, 52 | 1.000, 0.333, 0.500, 53 | 1.000, 0.667, 0.500, 54 | 1.000, 1.000, 0.500, 55 | 0.000, 0.333, 1.000, 56 | 0.000, 0.667, 1.000, 57 | 0.000, 1.000, 1.000, 58 | 0.333, 0.000, 1.000, 59 | 0.333, 0.333, 1.000, 60 | 0.333, 0.667, 1.000, 61 | 0.333, 1.000, 1.000, 62 | 0.667, 0.000, 1.000, 63 | 0.667, 0.333, 1.000, 64 | 0.667, 0.667, 1.000, 65 | 0.667, 1.000, 1.000, 66 | 1.000, 0.000, 1.000, 67 | 1.000, 0.333, 1.000, 68 | 1.000, 0.667, 1.000, 69 | 0.333, 0.000, 0.000, 70 | 0.500, 0.000, 0.000, 71 | 0.667, 0.000, 0.000, 72 | 0.833, 0.000, 0.000, 73 | 1.000, 0.000, 0.000, 74 | 0.000, 0.167, 0.000, 75 | 0.000, 0.333, 0.000, 76 | 0.000, 0.500, 0.000, 77 | 0.000, 0.667, 0.000, 78 | 0.000, 0.833, 0.000, 79 | 0.000, 1.000, 0.000, 80 | 0.000, 0.000, 0.167, 81 | 0.000, 0.000, 0.333, 82 | 0.000, 0.000, 0.500, 83 | 0.000, 0.000, 0.667, 84 | 0.000, 0.000, 0.833, 85 | 0.000, 0.000, 1.000, 86 | 0.000, 0.000, 0.000, 87 | 0.143, 0.143, 0.143, 88 | 0.857, 0.857, 0.857, 89 | 1.000, 1.000, 1.000 90 | ] 91 | ).astype(np.float32).reshape(-1, 3) 92 | # fmt: on 93 | 94 | 95 | def colormap(rgb=False, maximum=255): 96 | """ 97 | Args: 98 | rgb (bool): whether to return RGB colors or BGR colors. 99 | maximum (int): either 255 or 1 100 | 101 | Returns: 102 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 103 | """ 104 | assert maximum in [255, 1], maximum 105 | c = _COLORS * maximum 106 | if not rgb: 107 | c = c[:, ::-1] 108 | return c 109 | 110 | 111 | def random_color(rgb=False, maximum=255): 112 | """ 113 | Args: 114 | rgb (bool): whether to return RGB colors or BGR colors. 115 | maximum (int): either 255 or 1 116 | 117 | Returns: 118 | ndarray: a vector of 3 numbers 119 | """ 120 | idx = np.random.randint(0, len(_COLORS)) 121 | ret = _COLORS[idx] * maximum 122 | if not rgb: 123 | ret = ret[::-1] 124 | return ret 125 | 126 | 127 | if __name__ == "__main__": 128 | import cv2 129 | 130 | size = 100 131 | H, W = 10, 10 132 | canvas = np.random.rand(H * size, W * size, 3).astype("float32") 133 | for h in range(H): 134 | for w in range(W): 135 | idx = h * W + w 136 | if idx >= len(_COLORS): 137 | break 138 | canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] 139 | cv2.imshow("a", canvas) 140 | cv2.waitKey(0) 141 | -------------------------------------------------------------------------------- /utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import importlib 3 | import importlib.util 4 | import logging 5 | import numpy as np 6 | import os 7 | import random 8 | import sys 9 | from datetime import datetime 10 | import torch 11 | 12 | __all__ = ["seed_all_rng"] 13 | 14 | 15 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) 16 | """ 17 | PyTorch version as a tuple of 2 ints. Useful for comparison. 18 | """ 19 | 20 | 21 | def seed_all_rng(seed=None): 22 | """ 23 | Set the random seed for the RNG in torch, numpy and python. 24 | 25 | Args: 26 | seed (int): if None, will use a strong random seed. 27 | """ 28 | if seed is None: 29 | seed = ( 30 | os.getpid() 31 | + int(datetime.now().strftime("%S%f")) 32 | + int.from_bytes(os.urandom(2), "big") 33 | ) 34 | logger = logging.getLogger(__name__) 35 | logger.info("Using a generated random seed {}".format(seed)) 36 | np.random.seed(seed) 37 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 38 | random.seed(seed) 39 | 40 | 41 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path 42 | def _import_file(module_name, file_path, make_importable=False): 43 | spec = importlib.util.spec_from_file_location(module_name, file_path) 44 | module = importlib.util.module_from_spec(spec) 45 | spec.loader.exec_module(module) 46 | if make_importable: 47 | sys.modules[module_name] = module 48 | return module 49 | 50 | 51 | def _configure_libraries(): 52 | """ 53 | Configurations for some libraries. 54 | """ 55 | # An environment option to disable `import cv2` globally, 56 | # in case it leads to negative performance impact 57 | disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) 58 | if disable_cv2: 59 | sys.modules["cv2"] = None 60 | else: 61 | # Disable opencl in opencv since its interaction with cuda often has negative effects 62 | # This envvar is supported after OpenCV 3.4.0 63 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" 64 | try: 65 | import cv2 66 | 67 | if int(cv2.__version__.split(".")[0]) >= 3: 68 | cv2.ocl.setUseOpenCL(False) 69 | except ModuleNotFoundError: 70 | # Other types of ImportError, if happened, should not be ignored. 71 | # Because a failed opencv import could mess up address space 72 | # https://github.com/skvark/opencv-python/issues/381 73 | pass 74 | 75 | def get_version(module, digit=2): 76 | return tuple(map(int, module.__version__.split(".")[:digit])) 77 | 78 | # fmt: off 79 | assert get_version(torch) >= (1, 4), "Requires torch>=1.4" 80 | import fvcore 81 | assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1" 82 | import yaml 83 | assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" 84 | # fmt: on 85 | 86 | 87 | _ENV_SETUP_DONE = False 88 | 89 | 90 | def setup_environment(): 91 | """Perform environment setup work. The default setup is a no-op, but this 92 | function allows the user to specify a Python source file or a module in 93 | the $DETECTRON2_ENV_MODULE environment variable, that performs 94 | custom setup work that may be necessary to their computing environment. 95 | """ 96 | global _ENV_SETUP_DONE 97 | if _ENV_SETUP_DONE: 98 | return 99 | _ENV_SETUP_DONE = True 100 | 101 | _configure_libraries() 102 | 103 | custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") 104 | 105 | if custom_module_path: 106 | setup_custom_environment(custom_module_path) 107 | else: 108 | # The default setup is a no-op 109 | pass 110 | 111 | 112 | def setup_custom_environment(custom_module): 113 | """ 114 | Load custom environment setup by importing a Python source file or a 115 | module, and run the setup function. 116 | """ 117 | if custom_module.endswith(".py"): 118 | module = _import_file("detectron2.utils.env.custom_module", custom_module) 119 | else: 120 | module = importlib.import_module(custom_module) 121 | assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( 122 | "Custom environment module defined in {} does not have the " 123 | "required callable attribute 'setup_environment'." 124 | ).format(custom_module) 125 | module.setup_environment() 126 | -------------------------------------------------------------------------------- /modeling/roi_heads/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from typing import List 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | 8 | from detectron2.config import configurable 9 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm 10 | from detectron2.utils.registry import Registry 11 | 12 | __all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"] 13 | 14 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") 15 | ROI_BOX_HEAD_REGISTRY.__doc__ = """ 16 | Registry for box heads, which make box predictions from per-region features. 17 | 18 | The registered object will be called with `obj(cfg, input_shape)`. 19 | """ 20 | 21 | 22 | # To get torchscript support, we make the head a subclass of `nn.Sequential`. 23 | # Therefore, to add new layers in this head class, please make sure they are 24 | # added in the order they will be used in forward(). 25 | @ROI_BOX_HEAD_REGISTRY.register() 26 | class FastRCNNConvFCHead(nn.Sequential): 27 | """ 28 | A head with several 3x3 conv layers (each followed by norm & relu) and then 29 | several fc layers (each followed by relu). 30 | """ 31 | 32 | @configurable 33 | def __init__( 34 | self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" 35 | ): 36 | """ 37 | NOTE: this interface is experimental. 38 | 39 | Args: 40 | input_shape (ShapeSpec): shape of the input feature. 41 | conv_dims (list[int]): the output dimensions of the conv layers 42 | fc_dims (list[int]): the output dimensions of the fc layers 43 | conv_norm (str or callable): normalization for the conv layers. 44 | See :func:`detectron2.layers.get_norm` for supported types. 45 | """ 46 | super().__init__() 47 | assert len(conv_dims) + len(fc_dims) > 0 48 | 49 | self._output_size = (input_shape.channels, input_shape.height, input_shape.width) 50 | 51 | self.conv_norm_relus = [] 52 | for k, conv_dim in enumerate(conv_dims): 53 | conv = Conv2d( 54 | self._output_size[0], 55 | conv_dim, 56 | kernel_size=3, 57 | padding=1, 58 | bias=not conv_norm, 59 | norm=get_norm(conv_norm, conv_dim), 60 | activation=nn.ReLU(), 61 | ) 62 | self.add_module("conv{}".format(k + 1), conv) 63 | self.conv_norm_relus.append(conv) 64 | self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) 65 | 66 | self.fcs = [] 67 | for k, fc_dim in enumerate(fc_dims): 68 | if k == 0: 69 | self.add_module("flatten", nn.Flatten()) 70 | fc = Linear(int(np.prod(self._output_size)), fc_dim) 71 | self.add_module("fc{}".format(k + 1), fc) 72 | self.add_module("fc_relu{}".format(k + 1), nn.ReLU()) 73 | self.fcs.append(fc) 74 | self._output_size = fc_dim 75 | 76 | for layer in self.conv_norm_relus: 77 | weight_init.c2_msra_fill(layer) 78 | for layer in self.fcs: 79 | weight_init.c2_xavier_fill(layer) 80 | 81 | @classmethod 82 | def from_config(cls, cfg, input_shape): 83 | num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV 84 | conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM 85 | num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC 86 | fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM 87 | return { 88 | "input_shape": input_shape, 89 | "conv_dims": [conv_dim] * num_conv, 90 | "fc_dims": [fc_dim] * num_fc, 91 | "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, 92 | } 93 | 94 | def forward(self, x): 95 | for layer in self: 96 | x = layer(x) 97 | return x 98 | 99 | @property 100 | @torch.jit.unused 101 | def output_shape(self): 102 | """ 103 | Returns: 104 | ShapeSpec: the output feature shape 105 | """ 106 | o = self._output_size 107 | if isinstance(o, int): 108 | return ShapeSpec(channels=o) 109 | else: 110 | return ShapeSpec(channels=o[0], height=o[1], width=o[2]) 111 | 112 | 113 | def build_box_head(cfg, input_shape): 114 | """ 115 | Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. 116 | """ 117 | name = cfg.MODEL.ROI_BOX_HEAD.NAME 118 | return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) 119 | -------------------------------------------------------------------------------- /solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import math 3 | from bisect import bisect_right 4 | from typing import List 5 | import torch 6 | 7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes 8 | # only on epoch boundaries. We typically use iteration based schedules instead. 9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean 10 | # "iteration" instead. 11 | 12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating 13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. 14 | 15 | 16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 17 | def __init__( 18 | self, 19 | optimizer: torch.optim.Optimizer, 20 | milestones: List[int], 21 | gamma: float = 0.1, 22 | warmup_factor: float = 0.001, 23 | warmup_iters: int = 1000, 24 | warmup_method: str = "linear", 25 | last_epoch: int = -1, 26 | ): 27 | if not list(milestones) == sorted(milestones): 28 | raise ValueError( 29 | "Milestones should be a list of" " increasing integers. Got {}", milestones 30 | ) 31 | self.milestones = milestones 32 | self.gamma = gamma 33 | self.warmup_factor = warmup_factor 34 | self.warmup_iters = warmup_iters 35 | self.warmup_method = warmup_method 36 | super().__init__(optimizer, last_epoch) 37 | 38 | def get_lr(self) -> List[float]: 39 | warmup_factor = _get_warmup_factor_at_iter( 40 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 41 | ) 42 | return [ 43 | base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) 44 | for base_lr in self.base_lrs 45 | ] 46 | 47 | def _compute_values(self) -> List[float]: 48 | # The new interface 49 | return self.get_lr() 50 | 51 | 52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): 53 | def __init__( 54 | self, 55 | optimizer: torch.optim.Optimizer, 56 | max_iters: int, 57 | warmup_factor: float = 0.001, 58 | warmup_iters: int = 1000, 59 | warmup_method: str = "linear", 60 | last_epoch: int = -1, 61 | ): 62 | self.max_iters = max_iters 63 | self.warmup_factor = warmup_factor 64 | self.warmup_iters = warmup_iters 65 | self.warmup_method = warmup_method 66 | super().__init__(optimizer, last_epoch) 67 | 68 | def get_lr(self) -> List[float]: 69 | warmup_factor = _get_warmup_factor_at_iter( 70 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 71 | ) 72 | # Different definitions of half-cosine with warmup are possible. For 73 | # simplicity we multiply the standard half-cosine schedule by the warmup 74 | # factor. An alternative is to start the period of the cosine at warmup_iters 75 | # instead of at 0. In the case that warmup_iters << max_iters the two are 76 | # very close to each other. 77 | return [ 78 | base_lr 79 | * warmup_factor 80 | * 0.5 81 | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) 82 | for base_lr in self.base_lrs 83 | ] 84 | 85 | def _compute_values(self) -> List[float]: 86 | # The new interface 87 | return self.get_lr() 88 | 89 | 90 | def _get_warmup_factor_at_iter( 91 | method: str, iter: int, warmup_iters: int, warmup_factor: float 92 | ) -> float: 93 | """ 94 | Return the learning rate warmup factor at a specific iteration. 95 | See :paper:`ImageNet in 1h` for more details. 96 | 97 | Args: 98 | method (str): warmup method; either "constant" or "linear". 99 | iter (int): iteration at which to calculate the warmup factor. 100 | warmup_iters (int): the number of warmup iterations. 101 | warmup_factor (float): the base warmup factor (the meaning changes according 102 | to the method used). 103 | 104 | Returns: 105 | float: the effective warmup factor at the given iteration. 106 | """ 107 | if iter >= warmup_iters: 108 | return 1.0 109 | 110 | if method == "constant": 111 | return warmup_factor 112 | elif method == "linear": 113 | alpha = iter / warmup_iters 114 | return warmup_factor * (1 - alpha) + alpha 115 | else: 116 | raise ValueError("Unknown warmup method: {}".format(method)) 117 | -------------------------------------------------------------------------------- /layers/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Wrappers around on some nn functions, mainly to support empty tensors. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in those functions. 6 | 7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013 8 | is implemented 9 | """ 10 | 11 | from typing import List 12 | import torch 13 | from torch.nn import functional as F 14 | 15 | from detectron2.utils.env import TORCH_VERSION 16 | 17 | 18 | def cat(tensors: List[torch.Tensor], dim: int = 0): 19 | """ 20 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 21 | """ 22 | assert isinstance(tensors, (list, tuple)) 23 | if len(tensors) == 1: 24 | return tensors[0] 25 | return torch.cat(tensors, dim) 26 | 27 | 28 | class _NewEmptyTensorOp(torch.autograd.Function): 29 | @staticmethod 30 | def forward(ctx, x, new_shape): 31 | ctx.shape = x.shape 32 | return x.new_empty(new_shape) 33 | 34 | @staticmethod 35 | def backward(ctx, grad): 36 | shape = ctx.shape 37 | return _NewEmptyTensorOp.apply(grad, shape), None 38 | 39 | 40 | class Conv2d(torch.nn.Conv2d): 41 | """ 42 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 43 | """ 44 | 45 | def __init__(self, *args, **kwargs): 46 | """ 47 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 48 | 49 | Args: 50 | norm (nn.Module, optional): a normalization layer 51 | activation (callable(Tensor) -> Tensor): a callable activation function 52 | 53 | It assumes that norm layer is used before activation. 54 | """ 55 | norm = kwargs.pop("norm", None) 56 | activation = kwargs.pop("activation", None) 57 | super().__init__(*args, **kwargs) 58 | 59 | self.norm = norm 60 | self.activation = activation 61 | 62 | def forward(self, x): 63 | # torchscript does not support SyncBatchNorm yet 64 | # https://github.com/pytorch/pytorch/issues/40507 65 | # and we skip these codes in torchscript since: 66 | # 1. currently we only support torchscript in evaluation mode 67 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or 68 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. 69 | if not torch.jit.is_scripting(): 70 | if x.numel() == 0 and self.training: 71 | # https://github.com/pytorch/pytorch/issues/12013 72 | assert not isinstance( 73 | self.norm, torch.nn.SyncBatchNorm 74 | ), "SyncBatchNorm does not support empty inputs!" 75 | 76 | x = F.conv2d( 77 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups 78 | ) 79 | if self.norm is not None: 80 | x = self.norm(x) 81 | if self.activation is not None: 82 | x = self.activation(x) 83 | return x 84 | 85 | 86 | ConvTranspose2d = torch.nn.ConvTranspose2d 87 | BatchNorm2d = torch.nn.BatchNorm2d 88 | interpolate = torch.nn.functional.interpolate 89 | 90 | 91 | if TORCH_VERSION > (1, 5): 92 | Linear = torch.nn.Linear 93 | else: 94 | 95 | class Linear(torch.nn.Linear): 96 | """ 97 | A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features. 98 | Because of https://github.com/pytorch/pytorch/issues/34202 99 | """ 100 | 101 | def forward(self, x): 102 | if x.numel() == 0: 103 | output_shape = [x.shape[0], self.weight.shape[0]] 104 | 105 | empty = _NewEmptyTensorOp.apply(x, output_shape) 106 | if self.training: 107 | # This is to make DDP happy. 108 | # DDP expects all workers to have gradient w.r.t the same set of parameters. 109 | _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 110 | return empty + _dummy 111 | else: 112 | return empty 113 | 114 | x = super().forward(x) 115 | return x 116 | 117 | 118 | def nonzero_tuple(x): 119 | """ 120 | A 'as_tuple=True' version of torch.nonzero to support torchscript. 121 | because of https://github.com/pytorch/pytorch/issues/38718 122 | """ 123 | if torch.jit.is_scripting(): 124 | if x.dim() == 0: 125 | return x.unsqueeze(0).nonzero().unbind(1) 126 | return x.nonzero().unbind(1) 127 | else: 128 | return x.nonzero(as_tuple=True) 129 | -------------------------------------------------------------------------------- /projects/panoptic_deeplab/dataset_mapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | import logging 4 | import numpy as np 5 | from typing import Callable, List, Union 6 | import torch 7 | from panopticapi.utils import rgb2id 8 | 9 | from detectron2.config import configurable 10 | from detectron2.data import MetadataCatalog 11 | from detectron2.data import detection_utils as utils 12 | from detectron2.data import transforms as T 13 | 14 | from .target_generator import PanopticDeepLabTargetGenerator 15 | 16 | __all__ = ["PanopticDeeplabDatasetMapper"] 17 | 18 | 19 | class PanopticDeeplabDatasetMapper: 20 | """ 21 | The callable currently does the following: 22 | 23 | 1. Read the image from "file_name" and label from "pan_seg_file_name" 24 | 2. Applies random scale, crop and flip transforms to image and label 25 | 3. Prepare data to Tensor and generate training targets from label 26 | """ 27 | 28 | @configurable 29 | def __init__( 30 | self, 31 | *, 32 | augmentations: List[Union[T.Augmentation, T.Transform]], 33 | image_format: str, 34 | panoptic_target_generator: Callable, 35 | ): 36 | """ 37 | NOTE: this interface is experimental. 38 | 39 | Args: 40 | augmentations: a list of augmentations or deterministic transforms to apply 41 | image_format: an image format supported by :func:`detection_utils.read_image`. 42 | panoptic_target_generator: a callable that takes "panoptic_seg" and 43 | "segments_info" to generate training targets for the model. 44 | """ 45 | # fmt: off 46 | self.augmentations = T.AugmentationList(augmentations) 47 | self.image_format = image_format 48 | # fmt: on 49 | logger = logging.getLogger(__name__) 50 | logger.info("Augmentations used in training: " + str(augmentations)) 51 | 52 | self.panoptic_target_generator = panoptic_target_generator 53 | 54 | @classmethod 55 | def from_config(cls, cfg): 56 | augs = [ 57 | T.ResizeShortestEdge( 58 | cfg.INPUT.MIN_SIZE_TRAIN, 59 | cfg.INPUT.MAX_SIZE_TRAIN, 60 | cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, 61 | ) 62 | ] 63 | if cfg.INPUT.CROP.ENABLED: 64 | augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) 65 | augs.append(T.RandomFlip()) 66 | 67 | # Assume always applies to the training set. 68 | dataset_names = cfg.DATASETS.TRAIN 69 | meta = MetadataCatalog.get(dataset_names[0]) 70 | panoptic_target_generator = PanopticDeepLabTargetGenerator( 71 | ignore_label=meta.ignore_label, 72 | thing_ids=list(meta.thing_dataset_id_to_contiguous_id.values()), 73 | sigma=cfg.INPUT.GAUSSIAN_SIGMA, 74 | ignore_stuff_in_offset=cfg.INPUT.IGNORE_STUFF_IN_OFFSET, 75 | small_instance_area=cfg.INPUT.SMALL_INSTANCE_AREA, 76 | small_instance_weight=cfg.INPUT.SMALL_INSTANCE_WEIGHT, 77 | ignore_crowd_in_semantic=cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC, 78 | ) 79 | 80 | ret = { 81 | "augmentations": augs, 82 | "image_format": cfg.INPUT.FORMAT, 83 | "panoptic_target_generator": panoptic_target_generator, 84 | } 85 | return ret 86 | 87 | def __call__(self, dataset_dict): 88 | """ 89 | Args: 90 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 91 | 92 | Returns: 93 | dict: a format that builtin models in detectron2 accept 94 | """ 95 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 96 | # Load image. 97 | image = utils.read_image(dataset_dict["file_name"], format=self.image_format) 98 | utils.check_image_size(dataset_dict, image) 99 | # Panoptic label is encoded in RGB image. 100 | pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") 101 | 102 | # Reuses semantic transform for panoptic labels. 103 | aug_input = T.AugInput(image, sem_seg=pan_seg_gt) 104 | _ = self.augmentations(aug_input) 105 | image, pan_seg_gt = aug_input.image, aug_input.sem_seg 106 | 107 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 108 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 109 | # Therefore it's important to use torch.Tensor. 110 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) 111 | 112 | # Generates training targets for Panoptic-DeepLab. 113 | targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"]) 114 | dataset_dict.update(targets) 115 | 116 | return dataset_dict 117 | -------------------------------------------------------------------------------- /export/torchscript.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import torch 5 | 6 | from detectron2.utils.file_io import PathManager 7 | 8 | from .torchscript_patch import patch_instances 9 | 10 | 11 | def export_torchscript_with_instances(model, fields): 12 | """ 13 | Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since 14 | attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult 15 | for torchscript to support it out of the box. This function is made to support scripting 16 | a model that uses :class:`Instances`. It does the following: 17 | 18 | 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``, 19 | but with all attributes been "static". 20 | The attributes need to be statically declared in the ``fields`` argument. 21 | 2. Register ``new_Instances`` to torchscript, and force torchscript to 22 | use it when trying to compile ``Instances``. 23 | 24 | After this function, the process will be reverted. User should be able to script another model 25 | using different fields. 26 | 27 | Example: 28 | Assume that ``Instances`` in the model consist of two attributes named 29 | ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and 30 | :class:`Tensor` respectively during inference. You can call this function like: 31 | 32 | :: 33 | fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor} 34 | torchscipt_model = export_torchscript_with_instances(model, fields) 35 | 36 | Note: 37 | Currently we only support models in evaluation mode. 38 | 39 | Args: 40 | model (nn.Module): The input model to be exported to torchscript. 41 | fields (Dict[str, type]): Attribute names and corresponding type that 42 | ``Instances`` will use in the model. Note that all attributes used in ``Instances`` 43 | need to be added, regarldess of whether they are inputs/outputs of the model. 44 | Data type not defined in detectron2 is not supported for now. 45 | 46 | Returns: 47 | torch.jit.ScriptModule: the input model in torchscript format 48 | """ 49 | 50 | assert ( 51 | not model.training 52 | ), "Currently we only support exporting models in evaluation mode to torchscript" 53 | 54 | with patch_instances(fields): 55 | scripted_model = torch.jit.script(model) 56 | return scripted_model 57 | 58 | 59 | def dump_torchscript_IR(model, dir): 60 | """ 61 | Dump IR of a TracedModule/ScriptModule at various levels. 62 | Useful for debugging. 63 | 64 | Args: 65 | model (TracedModule or ScriptModule): traced or scripted module 66 | dir (str): output directory to dump files. 67 | """ 68 | PathManager.mkdirs(dir) 69 | 70 | def _get_script_mod(mod): 71 | if isinstance(mod, torch.jit.TracedModule): 72 | return mod._actual_script_module 73 | return mod 74 | 75 | # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code 76 | with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: 77 | 78 | def get_code(mod): 79 | # Try a few ways to get code using private attributes. 80 | try: 81 | # This contains more information than just `mod.code` 82 | return _get_script_mod(mod)._c.code 83 | except AttributeError: 84 | pass 85 | try: 86 | return mod.code 87 | except AttributeError: 88 | return None 89 | 90 | def dump_code(prefix, mod): 91 | code = get_code(mod) 92 | name = prefix or "root model" 93 | if code is None: 94 | f.write(f"Could not found code for {name} (type={mod.original_name})\n") 95 | f.write("\n") 96 | else: 97 | f.write(f"\nCode for {name}, type={mod.original_name}:\n") 98 | f.write(code) 99 | f.write("\n") 100 | f.write("-" * 80) 101 | 102 | for name, m in mod.named_children(): 103 | dump_code(prefix + "." + name, m) 104 | 105 | dump_code("", model) 106 | 107 | # Recursively dump IR of all modules 108 | with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: 109 | try: 110 | f.write(_get_script_mod(model)._c.dump_to_str(True, False, False)) 111 | except AttributeError: 112 | pass 113 | 114 | # Dump IR of the entire graph (all submodules inlined) 115 | with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: 116 | f.write(str(model.inlined_graph)) 117 | 118 | # Dump the model structure in pytorch style 119 | with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: 120 | f.write(str(model)) 121 | -------------------------------------------------------------------------------- /layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from torch import nn 3 | from torchvision.ops import roi_align as tv_roi_align 4 | 5 | try: 6 | from torchvision import __version__ 7 | 8 | version = tuple(int(x) for x in __version__.split(".")[:2]) 9 | USE_TORCHVISION = version >= (0, 7) # https://github.com/pytorch/vision/pull/2438 10 | except ImportError: # only open source torchvision has __version__ 11 | USE_TORCHVISION = True 12 | 13 | 14 | if USE_TORCHVISION: 15 | roi_align = tv_roi_align 16 | else: 17 | from torch.nn.modules.utils import _pair 18 | from torch.autograd import Function 19 | from torch.autograd.function import once_differentiable 20 | from detectron2 import _C 21 | 22 | class _ROIAlign(Function): 23 | @staticmethod 24 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 25 | ctx.save_for_backward(roi) 26 | ctx.output_size = _pair(output_size) 27 | ctx.spatial_scale = spatial_scale 28 | ctx.sampling_ratio = sampling_ratio 29 | ctx.input_shape = input.size() 30 | ctx.aligned = aligned 31 | output = _C.roi_align_forward( 32 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 33 | ) 34 | return output 35 | 36 | @staticmethod 37 | @once_differentiable 38 | def backward(ctx, grad_output): 39 | (rois,) = ctx.saved_tensors 40 | output_size = ctx.output_size 41 | spatial_scale = ctx.spatial_scale 42 | sampling_ratio = ctx.sampling_ratio 43 | bs, ch, h, w = ctx.input_shape 44 | grad_input = _C.roi_align_backward( 45 | grad_output, 46 | rois, 47 | spatial_scale, 48 | output_size[0], 49 | output_size[1], 50 | bs, 51 | ch, 52 | h, 53 | w, 54 | sampling_ratio, 55 | ctx.aligned, 56 | ) 57 | return grad_input, None, None, None, None, None 58 | 59 | roi_align = _ROIAlign.apply 60 | 61 | 62 | # NOTE: torchvision's RoIAlign has a different default aligned=False 63 | class ROIAlign(nn.Module): 64 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): 65 | """ 66 | Args: 67 | output_size (tuple): h, w 68 | spatial_scale (float): scale the input boxes by this number 69 | sampling_ratio (int): number of inputs samples to take for each output 70 | sample. 0 to take samples densely. 71 | aligned (bool): if False, use the legacy implementation in 72 | Detectron. If True, align the results more perfectly. 73 | 74 | Note: 75 | The meaning of aligned=True: 76 | 77 | Given a continuous coordinate c, its two neighboring pixel indices (in our 78 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 79 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 80 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original 81 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring 82 | pixel indices and therefore it uses pixels with a slightly incorrect alignment 83 | (relative to our pixel model) when performing bilinear interpolation. 84 | 85 | With `aligned=True`, 86 | we first appropriately scale the ROI and then shift it by -0.5 87 | prior to calling roi_align. This produces the correct neighbors; see 88 | detectron2/tests/test_roi_align.py for verification. 89 | 90 | The difference does not make a difference to the model's performance if 91 | ROIAlign is used together with conv layers. 92 | """ 93 | super(ROIAlign, self).__init__() 94 | self.output_size = output_size 95 | self.spatial_scale = spatial_scale 96 | self.sampling_ratio = sampling_ratio 97 | self.aligned = aligned 98 | 99 | def forward(self, input, rois): 100 | """ 101 | Args: 102 | input: NCHW images 103 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 104 | """ 105 | assert rois.dim() == 2 and rois.size(1) == 5 106 | return roi_align( 107 | input, 108 | rois.to(dtype=input.dtype), 109 | self.output_size, 110 | self.spatial_scale, 111 | self.sampling_ratio, 112 | self.aligned, 113 | ) 114 | 115 | def __repr__(self): 116 | tmpstr = self.__class__.__name__ + "(" 117 | tmpstr += "output_size=" + str(self.output_size) 118 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 119 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 120 | tmpstr += ", aligned=" + str(self.aligned) 121 | tmpstr += ")" 122 | return tmpstr 123 | -------------------------------------------------------------------------------- /layers/aspp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from copy import deepcopy 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from .batch_norm import get_norm 10 | from .wrappers import Conv2d 11 | 12 | 13 | class ASPP(nn.Module): 14 | """ 15 | Atrous Spatial Pyramid Pooling (ASPP). 16 | """ 17 | 18 | def __init__( 19 | self, 20 | in_channels, 21 | out_channels, 22 | dilations, 23 | *, 24 | norm, 25 | activation, 26 | pool_kernel_size=None, 27 | dropout: float = 0.0, 28 | ): 29 | """ 30 | Args: 31 | in_channels (int): number of input channels for ASPP. 32 | out_channels (int): number of output channels. 33 | dilations (list): a list of 3 dilations in ASPP. 34 | norm (str or callable): normalization for all conv layers. 35 | See :func:`layers.get_norm` for supported format. norm is 36 | applied to all conv layers except the conv following 37 | global average pooling. 38 | activation (callable): activation function. 39 | pool_kernel_size (tuple, list): the average pooling size (kh, kw) 40 | for image pooling layer in ASPP. If set to None, it always 41 | performs global average pooling. If not None, it must be 42 | divisible by the shape of inputs in forward(). It is recommended 43 | to use a fixed input feature size in training, and set this 44 | option to match this size, so that it performs global average 45 | pooling in training, and the size of the pooling window stays 46 | consistent in inference. 47 | dropout (float): apply dropout on the output of ASPP. It is used in 48 | the official DeepLab implementation with a rate of 0.1: 49 | https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa 50 | """ 51 | super(ASPP, self).__init__() 52 | assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations)) 53 | self.pool_kernel_size = pool_kernel_size 54 | self.dropout = dropout 55 | use_bias = norm == "" 56 | self.convs = nn.ModuleList() 57 | # conv 1x1 58 | self.convs.append( 59 | Conv2d( 60 | in_channels, 61 | out_channels, 62 | kernel_size=1, 63 | bias=use_bias, 64 | norm=get_norm(norm, out_channels), 65 | activation=deepcopy(activation), 66 | ) 67 | ) 68 | weight_init.c2_xavier_fill(self.convs[-1]) 69 | # atrous convs 70 | for dilation in dilations: 71 | self.convs.append( 72 | Conv2d( 73 | in_channels, 74 | out_channels, 75 | kernel_size=3, 76 | padding=dilation, 77 | dilation=dilation, 78 | bias=use_bias, 79 | norm=get_norm(norm, out_channels), 80 | activation=deepcopy(activation), 81 | ) 82 | ) 83 | weight_init.c2_xavier_fill(self.convs[-1]) 84 | # image pooling 85 | # We do not add BatchNorm because the spatial resolution is 1x1, 86 | # the original TF implementation has BatchNorm. 87 | if pool_kernel_size is None: 88 | image_pooling = nn.Sequential( 89 | nn.AdaptiveAvgPool2d(1), 90 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), 91 | ) 92 | else: 93 | image_pooling = nn.Sequential( 94 | nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1), 95 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), 96 | ) 97 | weight_init.c2_xavier_fill(image_pooling[1]) 98 | self.convs.append(image_pooling) 99 | 100 | self.project = Conv2d( 101 | 5 * out_channels, 102 | out_channels, 103 | kernel_size=1, 104 | bias=use_bias, 105 | norm=get_norm(norm, out_channels), 106 | activation=deepcopy(activation), 107 | ) 108 | weight_init.c2_xavier_fill(self.project) 109 | 110 | def forward(self, x): 111 | size = x.shape[-2:] 112 | if self.pool_kernel_size is not None: 113 | if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]: 114 | raise ValueError( 115 | "`pool_kernel_size` must be divisible by the shape of inputs. " 116 | "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size) 117 | ) 118 | res = [] 119 | for conv in self.convs: 120 | res.append(conv(x)) 121 | res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False) 122 | res = torch.cat(res, dim=1) 123 | res = self.project(res) 124 | res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res 125 | return res 126 | -------------------------------------------------------------------------------- /evaluation/fast_eval_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | import numpy as np 4 | import time 5 | from pycocotools.cocoeval import COCOeval 6 | 7 | from detectron2 import _C 8 | 9 | 10 | class COCOeval_opt(COCOeval): 11 | """ 12 | This is a slightly modified version of the original COCO API, where the functions evaluateImg() 13 | and accumulate() are implemented in C++ to speedup evaluation 14 | """ 15 | 16 | def evaluate(self): 17 | """ 18 | Run per image evaluation on given images and store results in self.evalImgs_cpp, a 19 | datastructure that isn't readable from Python but is used by a c++ implementation of 20 | accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure 21 | self.evalImgs because this datastructure is a computational bottleneck. 22 | :return: None 23 | """ 24 | tic = time.time() 25 | 26 | print("Running per image evaluation...") 27 | p = self.params 28 | # add backward compatibility if useSegm is specified in params 29 | if p.useSegm is not None: 30 | p.iouType = "segm" if p.useSegm == 1 else "bbox" 31 | print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)) 32 | print("Evaluate annotation type *{}*".format(p.iouType)) 33 | p.imgIds = list(np.unique(p.imgIds)) 34 | if p.useCats: 35 | p.catIds = list(np.unique(p.catIds)) 36 | p.maxDets = sorted(p.maxDets) 37 | self.params = p 38 | 39 | self._prepare() 40 | 41 | # loop through images, area range, max detection number 42 | catIds = p.catIds if p.useCats else [-1] 43 | 44 | if p.iouType == "segm" or p.iouType == "bbox": 45 | computeIoU = self.computeIoU 46 | elif p.iouType == "keypoints": 47 | computeIoU = self.computeOks 48 | self.ious = { 49 | (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds 50 | } 51 | 52 | maxDet = p.maxDets[-1] 53 | 54 | # <<<< Beginning of code differences with original COCO API 55 | def convert_instances_to_cpp(instances, is_det=False): 56 | # Convert annotations for a list of instances in an image to a format that's fast 57 | # to access in C++ 58 | instances_cpp = [] 59 | for instance in instances: 60 | instance_cpp = _C.InstanceAnnotation( 61 | int(instance["id"]), 62 | instance["score"] if is_det else instance.get("score", 0.0), 63 | instance["area"], 64 | bool(instance.get("iscrowd", 0)), 65 | bool(instance.get("ignore", 0)), 66 | ) 67 | instances_cpp.append(instance_cpp) 68 | return instances_cpp 69 | 70 | # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ 71 | ground_truth_instances = [ 72 | [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] 73 | for imgId in p.imgIds 74 | ] 75 | detected_instances = [ 76 | [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds] 77 | for imgId in p.imgIds 78 | ] 79 | ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] 80 | 81 | if not p.useCats: 82 | # For each image, flatten per-category lists into a single list 83 | ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances] 84 | detected_instances = [[[o for c in i for o in c]] for i in detected_instances] 85 | 86 | # Call C++ implementation of self.evaluateImgs() 87 | self._evalImgs_cpp = _C.COCOevalEvaluateImages( 88 | p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances 89 | ) 90 | self._evalImgs = None 91 | 92 | self._paramsEval = copy.deepcopy(self.params) 93 | toc = time.time() 94 | print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) 95 | # >>>> End of code differences with original COCO API 96 | 97 | def accumulate(self): 98 | """ 99 | Accumulate per image evaluation results and store the result in self.eval. Does not 100 | support changing parameter settings from those used by self.evaluate() 101 | """ 102 | print("Accumulating evaluation results...") 103 | tic = time.time() 104 | if not hasattr(self, "_evalImgs_cpp"): 105 | print("Please run evaluate() first") 106 | 107 | self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) 108 | 109 | # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections 110 | self.eval["recall"] = np.array(self.eval["recall"]).reshape( 111 | self.eval["counts"][:1] + self.eval["counts"][2:] 112 | ) 113 | 114 | # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X 115 | # num_area_ranges X num_max_detections 116 | self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"]) 117 | self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) 118 | toc = time.time() 119 | print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)) 120 | -------------------------------------------------------------------------------- /export/caffe2_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import contextlib 4 | from unittest import mock 5 | import torch 6 | 7 | from detectron2.modeling import poolers 8 | from detectron2.modeling.proposal_generator import rpn 9 | from detectron2.modeling.roi_heads import keypoint_head, mask_head 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers 11 | 12 | from .c10 import ( 13 | Caffe2Compatible, 14 | Caffe2FastRCNNOutputsInference, 15 | Caffe2KeypointRCNNInference, 16 | Caffe2MaskRCNNInference, 17 | Caffe2ROIPooler, 18 | Caffe2RPN, 19 | ) 20 | 21 | 22 | class GenericMixin(object): 23 | pass 24 | 25 | 26 | class Caffe2CompatibleConverter(object): 27 | """ 28 | A GenericUpdater which implements the `create_from` interface, by modifying 29 | module object and assign it with another class replaceCls. 30 | """ 31 | 32 | def __init__(self, replaceCls): 33 | self.replaceCls = replaceCls 34 | 35 | def create_from(self, module): 36 | # update module's class to the new class 37 | assert isinstance(module, torch.nn.Module) 38 | if issubclass(self.replaceCls, GenericMixin): 39 | # replaceCls should act as mixin, create a new class on-the-fly 40 | new_class = type( 41 | "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), 42 | (self.replaceCls, module.__class__), 43 | {}, # {"new_method": lambda self: ...}, 44 | ) 45 | module.__class__ = new_class 46 | else: 47 | # replaceCls is complete class, this allow arbitrary class swap 48 | module.__class__ = self.replaceCls 49 | 50 | # initialize Caffe2Compatible 51 | if isinstance(module, Caffe2Compatible): 52 | module.tensor_mode = False 53 | 54 | return module 55 | 56 | 57 | def patch(model, target, updater, *args, **kwargs): 58 | """ 59 | recursively (post-order) update all modules with the target type and its 60 | subclasses, make a initialization/composition/inheritance/... via the 61 | updater.create_from. 62 | """ 63 | for name, module in model.named_children(): 64 | model._modules[name] = patch(module, target, updater, *args, **kwargs) 65 | if isinstance(model, target): 66 | return updater.create_from(model, *args, **kwargs) 67 | return model 68 | 69 | 70 | def patch_generalized_rcnn(model): 71 | ccc = Caffe2CompatibleConverter 72 | model = patch(model, rpn.RPN, ccc(Caffe2RPN)) 73 | model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) 74 | 75 | return model 76 | 77 | 78 | @contextlib.contextmanager 79 | def mock_fastrcnn_outputs_inference( 80 | tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers 81 | ): 82 | with mock.patch.object( 83 | box_predictor_type, 84 | "inference", 85 | autospec=True, 86 | side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), 87 | ) as mocked_func: 88 | yield 89 | if check: 90 | assert mocked_func.call_count > 0 91 | 92 | 93 | @contextlib.contextmanager 94 | def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): 95 | with mock.patch( 96 | "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() 97 | ) as mocked_func: 98 | yield 99 | if check: 100 | assert mocked_func.call_count > 0 101 | 102 | 103 | @contextlib.contextmanager 104 | def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): 105 | with mock.patch( 106 | "{}.keypoint_rcnn_inference".format(patched_module), 107 | side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), 108 | ) as mocked_func: 109 | yield 110 | if check: 111 | assert mocked_func.call_count > 0 112 | 113 | 114 | class ROIHeadsPatcher: 115 | def __init__(self, heads, use_heatmap_max_keypoint): 116 | self.heads = heads 117 | self.use_heatmap_max_keypoint = use_heatmap_max_keypoint 118 | 119 | @contextlib.contextmanager 120 | def mock_roi_heads(self, tensor_mode=True): 121 | """ 122 | Patching several inference functions inside ROIHeads and its subclasses 123 | 124 | Args: 125 | tensor_mode (bool): whether the inputs/outputs are caffe2's tensor 126 | format or not. Default to True. 127 | """ 128 | # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` 129 | # are called inside the same file as BaseXxxHead due to using mock.patch. 130 | kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ 131 | mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ 132 | 133 | mock_ctx_managers = [ 134 | mock_fastrcnn_outputs_inference( 135 | tensor_mode=tensor_mode, 136 | check=True, 137 | box_predictor_type=type(self.heads.box_predictor), 138 | ) 139 | ] 140 | if getattr(self.heads, "keypoint_on", False): 141 | mock_ctx_managers += [ 142 | mock_keypoint_rcnn_inference( 143 | tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint 144 | ) 145 | ] 146 | if getattr(self.heads, "mask_on", False): 147 | mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] 148 | 149 | with contextlib.ExitStack() as stack: # python 3.3+ 150 | for mgr in mock_ctx_managers: 151 | stack.enter_context(mgr) 152 | yield 153 | -------------------------------------------------------------------------------- /structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import division 3 | from typing import Any, List, Tuple 4 | import torch 5 | from torch import device 6 | from torch.nn import functional as F 7 | 8 | from detectron2.utils.env import TORCH_VERSION 9 | 10 | 11 | class ImageList(object): 12 | """ 13 | Structure that holds a list of images (of possibly 14 | varying sizes) as a single tensor. 15 | This works by padding the images to the same size, 16 | and storing in a field the original sizes of each image 17 | 18 | Attributes: 19 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 20 | """ 21 | 22 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): 23 | """ 24 | Arguments: 25 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 26 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can 27 | be smaller than (H, W) due to padding. 28 | """ 29 | self.tensor = tensor 30 | self.image_sizes = image_sizes 31 | 32 | def __len__(self) -> int: 33 | return len(self.image_sizes) 34 | 35 | def __getitem__(self, idx) -> torch.Tensor: 36 | """ 37 | Access the individual image in its original size. 38 | 39 | Args: 40 | idx: int or slice 41 | 42 | Returns: 43 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 44 | """ 45 | size = self.image_sizes[idx] 46 | return self.tensor[idx, ..., : size[0], : size[1]] 47 | 48 | @torch.jit.unused 49 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 50 | cast_tensor = self.tensor.to(*args, **kwargs) 51 | return ImageList(cast_tensor, self.image_sizes) 52 | 53 | @property 54 | def device(self) -> device: 55 | return self.tensor.device 56 | 57 | @staticmethod 58 | def from_tensors( 59 | tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 60 | ) -> "ImageList": 61 | """ 62 | Args: 63 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 64 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded 65 | to the same shape with `pad_value`. 66 | size_divisibility (int): If `size_divisibility > 0`, add padding to ensure 67 | the common height and width is divisible by `size_divisibility`. 68 | This depends on the model and many models need a divisibility of 32. 69 | pad_value (float): value to pad 70 | 71 | Returns: 72 | an `ImageList`. 73 | """ 74 | assert len(tensors) > 0 75 | assert isinstance(tensors, (tuple, list)) 76 | for t in tensors: 77 | assert isinstance(t, torch.Tensor), type(t) 78 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 79 | 80 | # Magic code below that handles dynamic shapes for both scripting and tracing ... 81 | 82 | image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] 83 | 84 | if torch.jit.is_scripting(): 85 | max_size = torch.stack([torch.as_tensor(x) for x in image_sizes]).max(0).values 86 | if size_divisibility > 1: 87 | stride = size_divisibility 88 | # the last two dims are H,W, both subject to divisibility requirement 89 | max_size = (max_size + (stride - 1)) // stride * stride 90 | 91 | max_size: List[int] = max_size.to(dtype=torch.long).tolist() 92 | else: 93 | # https://github.com/pytorch/pytorch/issues/42448 94 | if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing(): 95 | # In tracing mode, x.shape[i] is a scalar Tensor, and should not be converted 96 | # to int: this will cause the traced graph to have hard-coded shapes. 97 | # Instead we convert each shape to a vector with a stack() 98 | image_sizes = [torch.stack(x) for x in image_sizes] 99 | 100 | # maximum (H, W) for the last two dims 101 | # find the maximum in a tracable way 102 | max_size = torch.stack(image_sizes).max(0).values 103 | else: 104 | # Original eager logic here -- not scripting, not tracing: 105 | # (can be unified with scripting after 106 | # https://github.com/pytorch/pytorch/issues/47379) 107 | max_size = torch.as_tensor( 108 | [max(s) for s in zip(*[img.shape[-2:] for img in tensors])] 109 | ) 110 | 111 | if size_divisibility > 1: 112 | stride = size_divisibility 113 | # the last two dims are H,W, both subject to divisibility requirement 114 | max_size = (max_size + (stride - 1)) // stride * stride 115 | 116 | if len(tensors) == 1: 117 | # This seems slightly (2%) faster. 118 | # TODO: check whether it's faster for multiple images as well 119 | image_size = image_sizes[0] 120 | padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] 121 | batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0) 122 | else: 123 | # max_size can be a tensor in tracing mode, therefore convert to list 124 | batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size) 125 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 126 | for img, pad_img in zip(tensors, batched_imgs): 127 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) 128 | 129 | return ImageList(batched_imgs.contiguous(), image_sizes) 130 | -------------------------------------------------------------------------------- /utils/analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import typing 6 | import torch 7 | from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table 8 | from torch import nn 9 | 10 | from detectron2.structures import BitMasks, Boxes, ImageList, Instances 11 | 12 | from .logger import log_first_n 13 | 14 | __all__ = [ 15 | "activation_count_operators", 16 | "flop_count_operators", 17 | "parameter_count_table", 18 | "parameter_count", 19 | ] 20 | 21 | FLOPS_MODE = "flops" 22 | ACTIVATIONS_MODE = "activations" 23 | 24 | 25 | # some extra ops to ignore from counting. 26 | _IGNORED_OPS = { 27 | "aten::add", 28 | "aten::add_", 29 | "aten::batch_norm", 30 | "aten::constant_pad_nd", 31 | "aten::div", 32 | "aten::div_", 33 | "aten::exp", 34 | "aten::log2", 35 | "aten::max_pool2d", 36 | "aten::meshgrid", 37 | "aten::mul", 38 | "aten::mul_", 39 | "aten::nonzero_numpy", 40 | "aten::rsub", 41 | "aten::sigmoid", 42 | "aten::sigmoid_", 43 | "aten::softmax", 44 | "aten::sort", 45 | "aten::sqrt", 46 | "aten::sub", 47 | "aten::upsample_nearest2d", 48 | "prim::PythonOp", 49 | "torchvision::nms", # TODO estimate flop for nms 50 | } 51 | 52 | 53 | def flop_count_operators( 54 | model: nn.Module, inputs: list, **kwargs 55 | ) -> typing.DefaultDict[str, float]: 56 | """ 57 | Implement operator-level flops counting using jit. 58 | This is a wrapper of fvcore.nn.flop_count, that supports standard detection models 59 | in detectron2. 60 | 61 | Note: 62 | The function runs the input through the model to compute flops. 63 | The flops of a detection model is often input-dependent, for example, 64 | the flops of box & mask head depends on the number of proposals & 65 | the number of detected objects. 66 | Therefore, the flops counting using a single input may not accurately 67 | reflect the computation cost of a model. 68 | 69 | Args: 70 | model: a detectron2 model that takes `list[dict]` as input. 71 | inputs (list[dict]): inputs to model, in detectron2's standard format. 72 | """ 73 | return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs) 74 | 75 | 76 | def activation_count_operators( 77 | model: nn.Module, inputs: list, **kwargs 78 | ) -> typing.DefaultDict[str, float]: 79 | """ 80 | Implement operator-level activations counting using jit. 81 | This is a wrapper of fvcore.nn.activation_count, that supports standard detection models 82 | in detectron2. 83 | 84 | Note: 85 | The function runs the input through the model to compute activations. 86 | The activations of a detection model is often input-dependent, for example, 87 | the activations of box & mask head depends on the number of proposals & 88 | the number of detected objects. 89 | 90 | Args: 91 | model: a detectron2 model that takes `list[dict]` as input. 92 | inputs (list[dict]): inputs to model, in detectron2's standard format. 93 | """ 94 | return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) 95 | 96 | 97 | def _flatten_to_tuple(outputs): 98 | result = [] 99 | if isinstance(outputs, torch.Tensor): 100 | result.append(outputs) 101 | elif isinstance(outputs, (list, tuple)): 102 | for v in outputs: 103 | result.extend(_flatten_to_tuple(v)) 104 | elif isinstance(outputs, dict): 105 | for _, v in outputs.items(): 106 | result.extend(_flatten_to_tuple(v)) 107 | elif isinstance(outputs, Instances): 108 | result.extend(_flatten_to_tuple(outputs.get_fields())) 109 | elif isinstance(outputs, (Boxes, BitMasks, ImageList)): 110 | result.append(outputs.tensor) 111 | else: 112 | log_first_n( 113 | logging.WARN, 114 | f"Output of type {type(outputs)} not included in flops/activations count.", 115 | n=10, 116 | ) 117 | return tuple(result) 118 | 119 | 120 | def _wrapper_count_operators( 121 | model: nn.Module, inputs: list, mode: str, **kwargs 122 | ) -> typing.DefaultDict[str, float]: 123 | 124 | # ignore some ops 125 | supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} 126 | supported_ops.update(kwargs.pop("supported_ops", {})) 127 | kwargs["supported_ops"] = supported_ops 128 | 129 | assert len(inputs) == 1, "Please use batch size=1" 130 | tensor_input = inputs[0]["image"] 131 | 132 | class WrapModel(nn.Module): 133 | def __init__(self, model): 134 | super().__init__() 135 | if isinstance( 136 | model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel) 137 | ): 138 | self.model = model.module 139 | else: 140 | self.model = model 141 | 142 | def forward(self, image): 143 | # jit requires the input/output to be Tensors 144 | inputs = [{"image": image}] 145 | outputs = self.model.forward(inputs) 146 | # Only the subgraph that computes the returned tuple of tensor will be 147 | # counted. So we flatten everything we found to tuple of tensors. 148 | return _flatten_to_tuple(outputs) 149 | 150 | old_train = model.training 151 | with torch.no_grad(): 152 | if mode == FLOPS_MODE: 153 | ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs) 154 | elif mode == ACTIVATIONS_MODE: 155 | ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs) 156 | else: 157 | raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) 158 | # compatible with change in fvcore 159 | if isinstance(ret, tuple): 160 | ret = ret[0] 161 | model.train(old_train) 162 | return ret 163 | -------------------------------------------------------------------------------- /checkpoint/catalog.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | 4 | from detectron2.utils.file_io import PathHandler, PathManager 5 | 6 | 7 | class ModelCatalog(object): 8 | """ 9 | Store mappings from names to third-party models. 10 | """ 11 | 12 | S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" 13 | 14 | # MSRA models have STRIDE_IN_1X1=True. False otherwise. 15 | # NOTE: all BN models here have fused BN into an affine layer. 16 | # As a result, you should only load them to a model with "FrozenBN". 17 | # Loading them to a model with regular BN or SyncBN is wrong. 18 | # Even when loaded to FrozenBN, it is still different from affine by an epsilon, 19 | # which should be negligible for training. 20 | # NOTE: all models here uses PIXEL_STD=[1,1,1] 21 | # NOTE: Most of the BN models here are no longer used. We use the 22 | # re-converted pre-trained models under detectron2 model zoo instead. 23 | C2_IMAGENET_MODELS = { 24 | "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", 25 | "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", 26 | "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", 27 | "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", 28 | "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", 29 | "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", 30 | "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", 31 | } 32 | 33 | C2_DETECTRON_PATH_FORMAT = ( 34 | "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 35 | ) 36 | 37 | C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" 38 | C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" 39 | 40 | # format: {model_name} -> part of the url 41 | C2_DETECTRON_MODELS = { 42 | "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 43 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 44 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 45 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 46 | "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 47 | "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 48 | "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 49 | "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 50 | "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 51 | "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 52 | "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 53 | "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 54 | "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 55 | } 56 | 57 | @staticmethod 58 | def get(name): 59 | if name.startswith("Caffe2Detectron/COCO"): 60 | return ModelCatalog._get_c2_detectron_baseline(name) 61 | if name.startswith("ImageNetPretrained/"): 62 | return ModelCatalog._get_c2_imagenet_pretrained(name) 63 | raise RuntimeError("model not present in the catalog: {}".format(name)) 64 | 65 | @staticmethod 66 | def _get_c2_imagenet_pretrained(name): 67 | prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX 68 | name = name[len("ImageNetPretrained/") :] 69 | name = ModelCatalog.C2_IMAGENET_MODELS[name] 70 | url = "/".join([prefix, name]) 71 | return url 72 | 73 | @staticmethod 74 | def _get_c2_detectron_baseline(name): 75 | name = name[len("Caffe2Detectron/COCO/") :] 76 | url = ModelCatalog.C2_DETECTRON_MODELS[name] 77 | if "keypoint_rcnn" in name: 78 | dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS 79 | else: 80 | dataset = ModelCatalog.C2_DATASET_COCO 81 | 82 | if "35998355/rpn_R-50-C4_1x" in name: 83 | # this one model is somehow different from others .. 84 | type = "rpn" 85 | else: 86 | type = "generalized_rcnn" 87 | 88 | # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. 89 | url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( 90 | prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset 91 | ) 92 | return url 93 | 94 | 95 | class ModelCatalogHandler(PathHandler): 96 | """ 97 | Resolve URL like catalog://. 98 | """ 99 | 100 | PREFIX = "catalog://" 101 | 102 | def _get_supported_prefixes(self): 103 | return [self.PREFIX] 104 | 105 | def _get_local_path(self, path): 106 | logger = logging.getLogger(__name__) 107 | catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) 108 | logger.info("Catalog entry {} points to {}".format(path, catalog_path)) 109 | return PathManager.get_local_path(catalog_path) 110 | 111 | def _open(self, path, mode="r", **kwargs): 112 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 113 | 114 | 115 | PathManager.register_handler(ModelCatalogHandler()) 116 | -------------------------------------------------------------------------------- /projects/point_rend/semantic_seg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from typing import Dict 4 | import torch 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | from detectron2.layers import ShapeSpec, cat 9 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY 10 | 11 | from .point_features import ( 12 | get_uncertain_point_coords_on_grid, 13 | get_uncertain_point_coords_with_randomness, 14 | point_sample, 15 | ) 16 | from .point_head import build_point_head 17 | 18 | 19 | def calculate_uncertainty(sem_seg_logits): 20 | """ 21 | For each location of the prediction `sem_seg_logits` we estimate uncerainty as the 22 | difference between top first and top second predicted logits. 23 | 24 | Args: 25 | mask_logits (Tensor): A tensor of shape (N, C, ...), where N is the minibatch size and 26 | C is the number of foreground classes. The values are logits. 27 | 28 | Returns: 29 | scores (Tensor): A tensor of shape (N, 1, ...) that contains uncertainty scores with 30 | the most uncertain locations having the highest uncertainty score. 31 | """ 32 | top2_scores = torch.topk(sem_seg_logits, k=2, dim=1)[0] 33 | return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) 34 | 35 | 36 | @SEM_SEG_HEADS_REGISTRY.register() 37 | class PointRendSemSegHead(nn.Module): 38 | """ 39 | A semantic segmentation head that combines a head set in `POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME` 40 | and a point head set in `MODEL.POINT_HEAD.NAME`. 41 | """ 42 | 43 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 44 | super().__init__() 45 | 46 | self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE 47 | 48 | self.coarse_sem_seg_head = SEM_SEG_HEADS_REGISTRY.get( 49 | cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME 50 | )(cfg, input_shape) 51 | self._init_point_head(cfg, input_shape) 52 | 53 | def _init_point_head(self, cfg, input_shape: Dict[str, ShapeSpec]): 54 | # fmt: off 55 | assert cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES 56 | feature_channels = {k: v.channels for k, v in input_shape.items()} 57 | self.in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES 58 | self.train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS 59 | self.oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO 60 | self.importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO 61 | self.subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS 62 | self.subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS 63 | # fmt: on 64 | 65 | in_channels = np.sum([feature_channels[f] for f in self.in_features]) 66 | self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) 67 | 68 | def forward(self, features, targets=None): 69 | coarse_sem_seg_logits = self.coarse_sem_seg_head.layers(features) 70 | 71 | if self.training: 72 | losses = self.coarse_sem_seg_head.losses(coarse_sem_seg_logits, targets) 73 | 74 | with torch.no_grad(): 75 | point_coords = get_uncertain_point_coords_with_randomness( 76 | coarse_sem_seg_logits, 77 | calculate_uncertainty, 78 | self.train_num_points, 79 | self.oversample_ratio, 80 | self.importance_sample_ratio, 81 | ) 82 | coarse_features = point_sample(coarse_sem_seg_logits, point_coords, align_corners=False) 83 | 84 | fine_grained_features = cat( 85 | [ 86 | point_sample(features[in_feature], point_coords, align_corners=False) 87 | for in_feature in self.in_features 88 | ], 89 | dim=1, 90 | ) 91 | point_logits = self.point_head(fine_grained_features, coarse_features) 92 | point_targets = ( 93 | point_sample( 94 | targets.unsqueeze(1).to(torch.float), 95 | point_coords, 96 | mode="nearest", 97 | align_corners=False, 98 | ) 99 | .squeeze(1) 100 | .to(torch.long) 101 | ) 102 | losses["loss_sem_seg_point"] = F.cross_entropy( 103 | point_logits, point_targets, reduction="mean", ignore_index=self.ignore_value 104 | ) 105 | return None, losses 106 | else: 107 | sem_seg_logits = coarse_sem_seg_logits.clone() 108 | for _ in range(self.subdivision_steps): 109 | sem_seg_logits = F.interpolate( 110 | sem_seg_logits, scale_factor=2, mode="bilinear", align_corners=False 111 | ) 112 | uncertainty_map = calculate_uncertainty(sem_seg_logits) 113 | point_indices, point_coords = get_uncertain_point_coords_on_grid( 114 | uncertainty_map, self.subdivision_num_points 115 | ) 116 | fine_grained_features = cat( 117 | [ 118 | point_sample(features[in_feature], point_coords, align_corners=False) 119 | for in_feature in self.in_features 120 | ] 121 | ) 122 | coarse_features = point_sample( 123 | coarse_sem_seg_logits, point_coords, align_corners=False 124 | ) 125 | point_logits = self.point_head(fine_grained_features, coarse_features) 126 | 127 | # put sem seg point predictions to the right places on the upsampled grid. 128 | N, C, H, W = sem_seg_logits.shape 129 | point_indices = point_indices.unsqueeze(1).expand(-1, C, -1) 130 | sem_seg_logits = ( 131 | sem_seg_logits.reshape(N, C, H * W) 132 | .scatter_(2, point_indices, point_logits) 133 | .view(N, C, H, W) 134 | ) 135 | return sem_seg_logits, {} 136 | -------------------------------------------------------------------------------- /projects/deeplab/resnet.py: -------------------------------------------------------------------------------- 1 | import fvcore.nn.weight_init as weight_init 2 | import torch.nn.functional as F 3 | 4 | from detectron2.layers import CNNBlockBase, Conv2d, get_norm 5 | from detectron2.modeling import BACKBONE_REGISTRY 6 | from detectron2.modeling.backbone.resnet import ( 7 | BasicStem, 8 | BottleneckBlock, 9 | DeformBottleneckBlock, 10 | ResNet, 11 | ) 12 | 13 | 14 | class DeepLabStem(CNNBlockBase): 15 | """ 16 | The DeepLab ResNet stem (layers before the first residual block). 17 | """ 18 | 19 | def __init__(self, in_channels=3, out_channels=128, norm="BN"): 20 | """ 21 | Args: 22 | norm (str or callable): norm after the first conv layer. 23 | See :func:`layers.get_norm` for supported format. 24 | """ 25 | super().__init__(in_channels, out_channels, 4) 26 | self.in_channels = in_channels 27 | self.conv1 = Conv2d( 28 | in_channels, 29 | out_channels // 2, 30 | kernel_size=3, 31 | stride=2, 32 | padding=1, 33 | bias=False, 34 | norm=get_norm(norm, out_channels // 2), 35 | ) 36 | self.conv2 = Conv2d( 37 | out_channels // 2, 38 | out_channels // 2, 39 | kernel_size=3, 40 | stride=1, 41 | padding=1, 42 | bias=False, 43 | norm=get_norm(norm, out_channels // 2), 44 | ) 45 | self.conv3 = Conv2d( 46 | out_channels // 2, 47 | out_channels, 48 | kernel_size=3, 49 | stride=1, 50 | padding=1, 51 | bias=False, 52 | norm=get_norm(norm, out_channels), 53 | ) 54 | weight_init.c2_msra_fill(self.conv1) 55 | weight_init.c2_msra_fill(self.conv2) 56 | weight_init.c2_msra_fill(self.conv3) 57 | 58 | def forward(self, x): 59 | x = self.conv1(x) 60 | x = F.relu_(x) 61 | x = self.conv2(x) 62 | x = F.relu_(x) 63 | x = self.conv3(x) 64 | x = F.relu_(x) 65 | x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 66 | return x 67 | 68 | 69 | @BACKBONE_REGISTRY.register() 70 | def build_resnet_deeplab_backbone(cfg, input_shape): 71 | """ 72 | Create a ResNet instance from config. 73 | Returns: 74 | ResNet: a :class:`ResNet` instance. 75 | """ 76 | # need registration of new blocks/stems? 77 | norm = cfg.MODEL.RESNETS.NORM 78 | if cfg.MODEL.RESNETS.STEM_TYPE == "basic": 79 | stem = BasicStem( 80 | in_channels=input_shape.channels, 81 | out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, 82 | norm=norm, 83 | ) 84 | elif cfg.MODEL.RESNETS.STEM_TYPE == "deeplab": 85 | stem = DeepLabStem( 86 | in_channels=input_shape.channels, 87 | out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, 88 | norm=norm, 89 | ) 90 | else: 91 | raise ValueError("Unknown stem type: {}".format(cfg.MODEL.RESNETS.STEM_TYPE)) 92 | 93 | # fmt: off 94 | freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT 95 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES 96 | depth = cfg.MODEL.RESNETS.DEPTH 97 | num_groups = cfg.MODEL.RESNETS.NUM_GROUPS 98 | width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP 99 | bottleneck_channels = num_groups * width_per_group 100 | in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS 101 | out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 102 | stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 103 | res4_dilation = cfg.MODEL.RESNETS.RES4_DILATION 104 | res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION 105 | deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE 106 | deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED 107 | deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS 108 | res5_multi_grid = cfg.MODEL.RESNETS.RES5_MULTI_GRID 109 | # fmt: on 110 | assert res4_dilation in {1, 2}, "res4_dilation cannot be {}.".format(res4_dilation) 111 | assert res5_dilation in {1, 2, 4}, "res5_dilation cannot be {}.".format(res5_dilation) 112 | if res4_dilation == 2: 113 | # Always dilate res5 if res4 is dilated. 114 | assert res5_dilation == 4 115 | 116 | num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] 117 | 118 | stages = [] 119 | 120 | # Avoid creating variables without gradients 121 | # It consumes extra memory and may cause allreduce to fail 122 | out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] 123 | max_stage_idx = max(out_stage_idx) 124 | for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): 125 | if stage_idx == 4: 126 | dilation = res4_dilation 127 | elif stage_idx == 5: 128 | dilation = res5_dilation 129 | else: 130 | dilation = 1 131 | first_stride = 1 if idx == 0 or dilation > 1 else 2 132 | stage_kargs = { 133 | "num_blocks": num_blocks_per_stage[idx], 134 | "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), 135 | "in_channels": in_channels, 136 | "out_channels": out_channels, 137 | "norm": norm, 138 | } 139 | stage_kargs["bottleneck_channels"] = bottleneck_channels 140 | stage_kargs["stride_in_1x1"] = stride_in_1x1 141 | stage_kargs["dilation"] = dilation 142 | stage_kargs["num_groups"] = num_groups 143 | if deform_on_per_stage[idx]: 144 | stage_kargs["block_class"] = DeformBottleneckBlock 145 | stage_kargs["deform_modulated"] = deform_modulated 146 | stage_kargs["deform_num_groups"] = deform_num_groups 147 | else: 148 | stage_kargs["block_class"] = BottleneckBlock 149 | if stage_idx == 5: 150 | stage_kargs.pop("dilation") 151 | stage_kargs["dilation_per_block"] = [dilation * mg for mg in res5_multi_grid] 152 | blocks = ResNet.make_stage(**stage_kargs) 153 | in_channels = out_channels 154 | out_channels *= 2 155 | bottleneck_channels *= 2 156 | stages.append(blocks) 157 | return ResNet(stem, stages, out_features=out_features).freeze(freeze_at) 158 | -------------------------------------------------------------------------------- /modeling/matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from typing import List 3 | import torch 4 | 5 | from detectron2.layers import nonzero_tuple 6 | 7 | 8 | class Matcher(object): 9 | """ 10 | This class assigns to each predicted "element" (e.g., a box) a ground-truth 11 | element. Each predicted element will have exactly zero or one matches; each 12 | ground-truth element may be matched to zero or more predicted elements. 13 | 14 | The matching is determined by the MxN match_quality_matrix, that characterizes 15 | how well each (ground-truth, prediction)-pair match each other. For example, 16 | if the elements are boxes, this matrix may contain box intersection-over-union 17 | overlap values. 18 | 19 | The matcher returns (a) a vector of length N containing the index of the 20 | ground-truth element m in [0, M) that matches to prediction n in [0, N). 21 | (b) a vector of length N containing the labels for each prediction. 22 | """ 23 | 24 | def __init__( 25 | self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False 26 | ): 27 | """ 28 | Args: 29 | thresholds (list): a list of thresholds used to stratify predictions 30 | into levels. 31 | labels (list): a list of values to label predictions belonging at 32 | each level. A label can be one of {-1, 0, 1} signifying 33 | {ignore, negative class, positive class}, respectively. 34 | allow_low_quality_matches (bool): if True, produce additional matches 35 | for predictions with maximum match quality lower than high_threshold. 36 | See set_low_quality_matches_ for more details. 37 | 38 | For example, 39 | thresholds = [0.3, 0.5] 40 | labels = [0, -1, 1] 41 | All predictions with iou < 0.3 will be marked with 0 and 42 | thus will be considered as false positives while training. 43 | All predictions with 0.3 <= iou < 0.5 will be marked with -1 and 44 | thus will be ignored. 45 | All predictions with 0.5 <= iou will be marked with 1 and 46 | thus will be considered as true positives. 47 | """ 48 | # Add -inf and +inf to first and last position in thresholds 49 | thresholds = thresholds[:] 50 | assert thresholds[0] > 0 51 | thresholds.insert(0, -float("inf")) 52 | thresholds.append(float("inf")) 53 | # Currently torchscript does not support all + generator 54 | assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) 55 | assert all([l in [-1, 0, 1] for l in labels]) 56 | assert len(labels) == len(thresholds) - 1 57 | self.thresholds = thresholds 58 | self.labels = labels 59 | self.allow_low_quality_matches = allow_low_quality_matches 60 | 61 | def __call__(self, match_quality_matrix): 62 | """ 63 | Args: 64 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the 65 | pairwise quality between M ground-truth elements and N predicted 66 | elements. All elements must be >= 0 (due to the us of `torch.nonzero` 67 | for selecting indices in :meth:`set_low_quality_matches_`). 68 | 69 | Returns: 70 | matches (Tensor[int64]): a vector of length N, where matches[i] is a matched 71 | ground-truth index in [0, M) 72 | match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates 73 | whether a prediction is a true or false positive or ignored 74 | """ 75 | assert match_quality_matrix.dim() == 2 76 | if match_quality_matrix.numel() == 0: 77 | default_matches = match_quality_matrix.new_full( 78 | (match_quality_matrix.size(1),), 0, dtype=torch.int64 79 | ) 80 | # When no gt boxes exist, we define IOU = 0 and therefore set labels 81 | # to `self.labels[0]`, which usually defaults to background class 0 82 | # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds 83 | default_match_labels = match_quality_matrix.new_full( 84 | (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 85 | ) 86 | return default_matches, default_match_labels 87 | 88 | assert torch.all(match_quality_matrix >= 0) 89 | 90 | # match_quality_matrix is M (gt) x N (predicted) 91 | # Max over gt elements (dim 0) to find best gt candidate for each prediction 92 | matched_vals, matches = match_quality_matrix.max(dim=0) 93 | 94 | match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) 95 | 96 | for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): 97 | low_high = (matched_vals >= low) & (matched_vals < high) 98 | match_labels[low_high] = l 99 | 100 | if self.allow_low_quality_matches: 101 | self.set_low_quality_matches_(match_labels, match_quality_matrix) 102 | 103 | return matches, match_labels 104 | 105 | def set_low_quality_matches_(self, match_labels, match_quality_matrix): 106 | """ 107 | Produce additional matches for predictions that have only low-quality matches. 108 | Specifically, for each ground-truth G find the set of predictions that have 109 | maximum overlap with it (including ties); for each prediction in that set, if 110 | it is unmatched, then match it to the ground-truth G. 111 | 112 | This function implements the RPN assignment case (i) in Sec. 3.1.2 of 113 | :paper:`Faster R-CNN`. 114 | """ 115 | # For each gt, find the prediction with which it has highest quality 116 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) 117 | # Find the highest quality match available, even if it is low, including ties. 118 | # Note that the matches qualities must be positive due to the use of 119 | # `torch.nonzero`. 120 | _, pred_inds_with_highest_quality = nonzero_tuple( 121 | match_quality_matrix == highest_quality_foreach_gt[:, None] 122 | ) 123 | # If an anchor was labeled positive only due to a low-quality match 124 | # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. 125 | # This follows the implementation in Detectron, and is found to have no significant impact. 126 | match_labels[pred_inds_with_highest_quality] = 1 127 | --------------------------------------------------------------------------------