├── .gitignore ├── LICENSE ├── README.md ├── demo ├── config.py ├── demo.py ├── evaluate_pq.ipynb ├── fig.png ├── head_Latency_and_FLOPs.ipynb ├── neck_Latency_and_FLOPs.ipynb └── predictor.py ├── detectron2 ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── c2_model_loading.py │ ├── catalog.py │ └── detection_checkpoint.py ├── config │ ├── __init__.py │ ├── compat.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── build.py │ ├── catalog.py │ ├── common.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── builtin_meta.py │ │ ├── cityscapes.py │ │ ├── cityscapes_panoptic.py │ │ ├── coco.py │ │ ├── coco_panoptic.py │ │ ├── crowdhuman.py │ │ ├── lvis.py │ │ ├── lvis_v0_5_categories.py │ │ ├── lvis_v1_categories.py │ │ ├── mot.py │ │ ├── pascal_voc.py │ │ └── register_coco.py │ ├── detection_utils.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── grouped_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── augmentation.py │ │ ├── augmentation_impl.py │ │ └── transform.py ├── engine │ ├── __init__.py │ ├── defaults.py │ ├── hooks.py │ ├── launch.py │ └── train_loop.py ├── evaluation │ ├── __init__.py │ ├── cityscapes_evaluation.py │ ├── coco_evaluation.py │ ├── evaluator.py │ ├── fast_eval_api.py │ ├── lvis_evaluation.py │ ├── panoptic_evaluation.py │ ├── pascal_voc_evaluation.py │ ├── rotated_coco_evaluation.py │ ├── sem_seg_evaluation.py │ └── testing.py ├── export │ ├── README.md │ ├── __init__.py │ ├── api.py │ ├── c10.py │ ├── caffe2_export.py │ ├── caffe2_inference.py │ ├── caffe2_modeling.py │ ├── caffe2_patch.py │ ├── shared.py │ ├── torchscript.py │ └── torchscript_patch.py ├── layers │ ├── __init__.py │ ├── aspp.py │ ├── batch_norm.py │ ├── blocks.py │ ├── csrc │ │ ├── README.md │ │ ├── ROIAlign │ │ │ ├── ROIAlign.h │ │ │ ├── ROIAlign_cpu.cpp │ │ │ └── ROIAlign_cuda.cu │ │ ├── ROIAlignRotated │ │ │ ├── ROIAlignRotated.h │ │ │ ├── ROIAlignRotated_cpu.cpp │ │ │ └── ROIAlignRotated_cuda.cu │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ ├── box_iou_rotated_cpu.cpp │ │ │ ├── box_iou_rotated_cuda.cu │ │ │ └── box_iou_rotated_utils.h │ │ ├── cocoeval │ │ │ ├── cocoeval.cpp │ │ │ └── cocoeval.h │ │ ├── cuda_version.cu │ │ ├── deformable │ │ │ ├── deform_conv.h │ │ │ ├── deform_conv_cuda.cu │ │ │ └── deform_conv_cuda_kernel.cu │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ ├── nms_rotated_cpu.cpp │ │ │ └── nms_rotated_cuda.cu │ │ └── vision.cpp │ ├── deform_conv.py │ ├── mask_ops.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_align_rotated.py │ ├── rotated_boxes.py │ ├── shape_spec.py │ └── wrappers.py ├── model_zoo │ ├── __init__.py │ └── model_zoo.py ├── modeling │ ├── __init__.py │ ├── anchor_generator.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── build.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── box_regression.py │ ├── matcher.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── build.py │ │ ├── panoptic_fpn.py │ │ ├── rcnn.py │ │ ├── retinanet.py │ │ └── semantic_seg.py │ ├── poolers.py │ ├── postprocessing.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── build.py │ │ ├── proposal_utils.py │ │ ├── rpn.py │ │ └── rrpn.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── keypoint_head.py │ │ ├── mask_head.py │ │ ├── roi_heads.py │ │ └── rotated_fast_rcnn.py │ ├── sampling.py │ └── test_time_augmentation.py ├── projects │ ├── README.md │ └── __init__.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── boxes.py │ ├── image_list.py │ ├── instances.py │ ├── keypoints.py │ ├── masks.py │ └── rotated_boxes.py └── utils │ ├── README.md │ ├── __init__.py │ ├── analysis.py │ ├── collect_env.py │ ├── colormap.py │ ├── comm.py │ ├── env.py │ ├── events.py │ ├── file_io.py │ ├── logger.py │ ├── memory.py │ ├── registry.py │ ├── serialize.py │ ├── testing.py │ ├── video_visualizer.py │ └── visualizer.py ├── projects └── YOSO │ ├── configs │ ├── ade20k │ │ └── panoptic-segmentation │ │ │ ├── Base-ADE20K-PanopticSegmentation.yaml │ │ │ └── YOSO-R50.yaml │ ├── cityscapes │ │ └── panoptic-segmentation │ │ │ ├── Base-Cityscapes-PanopticSegmentation.yaml │ │ │ └── YOSO-R50.yaml │ ├── coco │ │ └── panoptic-segmentation │ │ │ ├── Base-COCO-PanopticSegmentation.yaml │ │ │ └── YOSO-R50.yaml │ └── mapillary-vistas │ │ └── panoptic-segmentation │ │ ├── Base-MapillaryVistas-PanopticSegmentation.yaml │ │ └── YOSO-R50.yaml │ ├── train_net.py │ └── yoso │ ├── __init__.py │ ├── config.py │ ├── data │ ├── __init__.py │ ├── dataset_mappers │ │ ├── yoso_instance_dataset_mapper.py │ │ ├── yoso_instance_lsj_dataset_mapper.py │ │ ├── yoso_panoptic_dataset_mapper.py │ │ ├── yoso_panoptic_lsj_dataset_mapper.py │ │ └── yoso_semantic_dataset_mapper.py │ └── datasets │ │ ├── __init__.py │ │ ├── register_ade20k_full.py │ │ ├── register_ade20k_instance.py │ │ ├── register_ade20k_panoptic.py │ │ ├── register_coco_panoptic_annos_semseg.py │ │ ├── register_coco_stuff_10k.py │ │ ├── register_mapillary_vistas.py │ │ └── register_mapillary_vistas_panoptic.py │ ├── head.py │ ├── loss.py │ ├── neck.py │ ├── segmentator.py │ └── utils.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output* 3 | 4 | 5 | *.png 6 | *.json 7 | *.diff 8 | *.jpg 9 | !/projects/DensePose/doc/images/*.jpg 10 | 11 | # compilation and distribution 12 | __pycache__ 13 | _ext 14 | *.pyc 15 | *.pyd 16 | *.so 17 | *.dll 18 | *.egg-info/ 19 | build/ 20 | dist/ 21 | wheels/ 22 | 23 | # pytorch/python/numpy formats 24 | *.pth 25 | *.pkl 26 | *.npy 27 | *.ts 28 | model_ts*.txt 29 | 30 | # ipython/jupyter notebooks 31 | # *.ipynb 32 | **/.ipynb_checkpoints/ 33 | 34 | # Editor temporaries 35 | *.swn 36 | *.swo 37 | *.swp 38 | *~ 39 | 40 | # editor settings 41 | .idea 42 | .vscode 43 | _darcs 44 | 45 | # project dirs 46 | # /detectron2/model_zoo/configs 47 | /datasets/* 48 | !/datasets/*.* 49 | /projects/*/datasets 50 | /models 51 | /snippet 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jie Hu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the project page for paper: 2 | >[**You Only Segment Once: Towards Real-Time Panoptic Segmentation**](https://arxiv.org/abs/2303.14651), In CVPR 2023. 3 | 4 | 5 | 6 | ## Model Zoo 7 | 8 | On COCO validation set: 9 | | Backbone | Scale | PQ | FPS| GPU | Model 10 | |:---:|:---:|:---:|:---:|:---:|:---:| 11 | |R50|800,1333|48.4|23.6|V100| [model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth) | 12 | |R50|512,800|46.4|45.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth)| 13 | 14 | On Cityscapes validation set: 15 | | Backbone | Scale | PQ | FPS| GPU | Model 16 | |:---:|:---:|:---:|:---:|:---:|:---:| 17 | |R50|1024,2048|59.7|11.1|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)| 18 | |R50|512,1024|52.5|22.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)| 19 | 20 | On ADE20k validation set: 21 | | Backbone | Scale | PQ | FPS| GPU | Model 22 | |:---:|:---:|:---:|:---:|:---:|:---:| 23 | |R50|640,2560|38.0|35.4|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_ade20k.pth)| 24 | 25 | On Mapillary Vistas validation set: 26 | | Backbone | Scale | PQ | FPS| GPU | Model 27 | |:---:|:---:|:---:|:---:|:---:|:---:| 28 | |R50|2048,2048|34.1|7.1|A100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_mapillary.pth)| 29 | 30 | ## Getting Started 31 | ### Installation 32 | We recommend to use [Anaconda](https://www.anaconda.com/) for installation. 33 | ```bash 34 | conda create -n YOSO python=3.8 -y 35 | conda activate YOSO 36 | conda install pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch 37 | pip install pycocotools -i https://pypi.douban.com/simple 38 | pip install git+https://github.com/cocodataset/panopticapi.git 39 | git clone https://github.com/hujiecpp/YOSO.git 40 | cd YOSO 41 | python setup.py develop 42 | ``` 43 | 44 | ### Datasets Preparation 45 | See [Preparing Datasets for Mask2Former](https://github.com/facebookresearch/Mask2Former/tree/main/datasets). 46 | 47 | ### Training & Evaluation 48 | 49 | - Train YOSO (e.g., on COCO dataset with R50 backbone). 50 | ```bash 51 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml 52 | ``` 53 | 54 | - Evaluate YOSO (e.g., on COCO dataset with R50 backbone). 55 | ```bash 56 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --eval-only MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth 57 | ``` 58 | 59 | ### Inference on Custom Image or Video 60 | 61 | - Run YOSO demo (e.g., on video with R50 backbone). 62 | ```bash 63 | python demo/demo.py --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --video-input input_video.mp4 --output output_video.mp4 --opts MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth 64 | ``` 65 | 66 | ## Acknowledgements 67 | 68 | - [Mask2Former](https://github.com/facebookresearch/Mask2Former) 69 | - [K-Net](https://github.com/ZwwWayne/K-Net) 70 | 71 | ## Citing YOSO 72 | 73 | If YOSO helps your research, please cite it in your publications: 74 | 75 | ```BibTeX 76 | @inproceedings{hu2023you, 77 | title={You Only Segment Once: Towards Real-Time Panoptic Segmentation}, 78 | author={Hu, Jie and Huang, Linyan and Ren, Tianhe and Zhang, Shengchuan and Ji, Rongrong and Cao, Liujuan}, 79 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 80 | pages={17819--17829}, 81 | year={2023} 82 | } 83 | ``` 84 | -------------------------------------------------------------------------------- /demo/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode as CN 2 | 3 | def add_yoso_config(cfg): 4 | cfg.MODEL.YOSO = CN() 5 | cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32 6 | cfg.MODEL.YOSO.NUM_CLASSES = 133 7 | cfg.MODEL.YOSO.NUM_STAGES = 2 8 | 9 | cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"] 10 | cfg.MODEL.YOSO.HIDDEN_DIM = 256 11 | cfg.MODEL.YOSO.AGG_DIM = 128 12 | cfg.MODEL.YOSO.NUM_PROPOSALS = 100 13 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1 14 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3 15 | cfg.MODEL.YOSO.NUM_CLS_FCS = 1 16 | cfg.MODEL.YOSO.NUM_MASK_FCS = 1 17 | 18 | cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1 19 | cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0 20 | cfg.MODEL.YOSO.MASK_WEIGHT = 5.0 21 | cfg.MODEL.YOSO.DICE_WEIGHT = 5.0 22 | cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112 23 | cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0 24 | cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75 25 | cfg.MODEL.YOSO.TEMPERATIRE = 0.1 26 | 27 | cfg.MODEL.YOSO.TEST = CN() 28 | cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False 29 | cfg.MODEL.YOSO.TEST.INSTANCE_ON = False 30 | cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False 31 | cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0 32 | cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0 33 | cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False 34 | 35 | cfg.SOLVER.OPTIMIZER = "ADAMW" 36 | cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 37 | cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 38 | cfg.SOLVER.WEIGHT_DECAY_BIAS = None 39 | 40 | cfg.SOLVER.POLY_LR_POWER = 0.9 41 | cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0 42 | 43 | cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj" 44 | cfg.INPUT.SIZE_DIVISIBILITY = -1 45 | cfg.INPUT.COLOR_AUG_SSD = False 46 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 47 | 48 | cfg.INPUT.IMAGE_SIZE = 1024 49 | cfg.INPUT.MIN_SCALE = 0.1 50 | cfg.INPUT.MAX_SCALE = 2.0 51 | 52 | -------------------------------------------------------------------------------- /demo/fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hujiecpp/YOSO/04b898d395ffd8318aa3761b0b2b6d20b3514f26/demo/fig.png -------------------------------------------------------------------------------- /detectron2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | 8 | # This line will be programatically read/write by setup.py. 9 | # Leave them at the bottom of this file and don't touch them. 10 | __version__ = "0.3" 11 | -------------------------------------------------------------------------------- /detectron2/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .detection_checkpoint import DetectionCheckpointer 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /detectron2/checkpoint/catalog.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | 4 | from detectron2.utils.file_io import PathHandler, PathManager 5 | 6 | 7 | class ModelCatalog(object): 8 | """ 9 | Store mappings from names to third-party models. 10 | """ 11 | 12 | S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" 13 | 14 | # MSRA models have STRIDE_IN_1X1=True. False otherwise. 15 | # NOTE: all BN models here have fused BN into an affine layer. 16 | # As a result, you should only load them to a model with "FrozenBN". 17 | # Loading them to a model with regular BN or SyncBN is wrong. 18 | # Even when loaded to FrozenBN, it is still different from affine by an epsilon, 19 | # which should be negligible for training. 20 | # NOTE: all models here uses PIXEL_STD=[1,1,1] 21 | # NOTE: Most of the BN models here are no longer used. We use the 22 | # re-converted pre-trained models under detectron2 model zoo instead. 23 | C2_IMAGENET_MODELS = { 24 | "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", 25 | "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", 26 | "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", 27 | "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", 28 | "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", 29 | "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", 30 | "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", 31 | } 32 | 33 | C2_DETECTRON_PATH_FORMAT = ( 34 | "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 35 | ) 36 | 37 | C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" 38 | C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" 39 | 40 | # format: {model_name} -> part of the url 41 | C2_DETECTRON_MODELS = { 42 | "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 43 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 44 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 45 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 46 | "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 47 | "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 48 | "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 49 | "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 50 | "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 51 | "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 52 | "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 53 | "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 54 | "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 55 | } 56 | 57 | @staticmethod 58 | def get(name): 59 | if name.startswith("Caffe2Detectron/COCO"): 60 | return ModelCatalog._get_c2_detectron_baseline(name) 61 | if name.startswith("ImageNetPretrained/"): 62 | return ModelCatalog._get_c2_imagenet_pretrained(name) 63 | raise RuntimeError("model not present in the catalog: {}".format(name)) 64 | 65 | @staticmethod 66 | def _get_c2_imagenet_pretrained(name): 67 | prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX 68 | name = name[len("ImageNetPretrained/") :] 69 | name = ModelCatalog.C2_IMAGENET_MODELS[name] 70 | url = "/".join([prefix, name]) 71 | return url 72 | 73 | @staticmethod 74 | def _get_c2_detectron_baseline(name): 75 | name = name[len("Caffe2Detectron/COCO/") :] 76 | url = ModelCatalog.C2_DETECTRON_MODELS[name] 77 | if "keypoint_rcnn" in name: 78 | dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS 79 | else: 80 | dataset = ModelCatalog.C2_DATASET_COCO 81 | 82 | if "35998355/rpn_R-50-C4_1x" in name: 83 | # this one model is somehow different from others .. 84 | type = "rpn" 85 | else: 86 | type = "generalized_rcnn" 87 | 88 | # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. 89 | url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( 90 | prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset 91 | ) 92 | return url 93 | 94 | 95 | class ModelCatalogHandler(PathHandler): 96 | """ 97 | Resolve URL like catalog://. 98 | """ 99 | 100 | PREFIX = "catalog://" 101 | 102 | def _get_supported_prefixes(self): 103 | return [self.PREFIX] 104 | 105 | def _get_local_path(self, path): 106 | logger = logging.getLogger(__name__) 107 | catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) 108 | logger.info("Catalog entry {} points to {}".format(path, catalog_path)) 109 | return PathManager.get_local_path(catalog_path) 110 | 111 | def _open(self, path, mode="r", **kwargs): 112 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 113 | 114 | 115 | PathManager.register_handler(ModelCatalogHandler()) 116 | -------------------------------------------------------------------------------- /detectron2/checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import pickle 3 | from fvcore.common.checkpoint import Checkpointer 4 | 5 | import detectron2.utils.comm as comm 6 | from detectron2.utils.file_io import PathManager 7 | 8 | from .c2_model_loading import align_and_update_state_dicts 9 | 10 | 11 | class DetectionCheckpointer(Checkpointer): 12 | """ 13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 14 | model zoo, and apply conversions for legacy models. 15 | """ 16 | 17 | def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): 18 | is_main_process = comm.is_main_process() 19 | super().__init__( 20 | model, 21 | save_dir, 22 | save_to_disk=is_main_process if save_to_disk is None else save_to_disk, 23 | **checkpointables, 24 | ) 25 | if hasattr(self, "path_manager"): 26 | self.path_manager = PathManager 27 | else: 28 | # This could only happen for open source 29 | # TODO remove after upgrading fvcore 30 | from fvcore.common.file_io import PathManager as g_PathManager 31 | 32 | for handler in PathManager._path_handlers.values(): 33 | try: 34 | g_PathManager.register_handler(handler) 35 | except KeyError: 36 | pass 37 | 38 | def _load_file(self, filename): 39 | if filename.endswith(".pkl"): 40 | with PathManager.open(filename, "rb") as f: 41 | data = pickle.load(f, encoding="latin1") 42 | if "model" in data and "__author__" in data: 43 | # file is in Detectron2 model zoo format 44 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 45 | return data 46 | else: 47 | # assume file is from Caffe2 / Detectron1 model zoo 48 | if "blobs" in data: 49 | # Detection models have "blobs", but ImageNet models don't 50 | data = data["blobs"] 51 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 52 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 53 | 54 | loaded = super()._load_file(filename) # load native pth checkpoint 55 | if "model" not in loaded: 56 | loaded = {"model": loaded} 57 | return loaded 58 | 59 | def _load_model(self, checkpoint): 60 | if checkpoint.get("matching_heuristics", False): 61 | self._convert_ndarray_to_tensor(checkpoint["model"]) 62 | # convert weights by name-matching heuristics 63 | model_state_dict = self.model.state_dict() 64 | align_and_update_state_dicts( 65 | model_state_dict, 66 | checkpoint["model"], 67 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2", 68 | ) 69 | checkpoint["model"] = model_state_dict 70 | # for non-caffe2 models, use standard ways to load it 71 | incompatible = super()._load_model(checkpoint) 72 | if incompatible is None: # support older versions of fvcore 73 | return None 74 | 75 | model_buffers = dict(self.model.named_buffers(recurse=False)) 76 | for k in ["pixel_mean", "pixel_std"]: 77 | # Ignore missing key message about pixel_mean/std. 78 | # Though they may be missing in old checkpoints, they will be correctly 79 | # initialized from config anyway. 80 | if k in model_buffers: 81 | try: 82 | incompatible.missing_keys.remove(k) 83 | except ValueError: 84 | pass 85 | return incompatible 86 | -------------------------------------------------------------------------------- /detectron2/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .compat import downgrade_config, upgrade_config 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable 4 | 5 | __all__ = [ 6 | "CfgNode", 7 | "get_cfg", 8 | "global_cfg", 9 | "set_global_cfg", 10 | "downgrade_config", 11 | "upgrade_config", 12 | "configurable", 13 | ] 14 | -------------------------------------------------------------------------------- /detectron2/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_batch_data_loader, 6 | build_detection_test_loader, 7 | build_detection_train_loader, 8 | get_detection_dataset_dicts, 9 | load_proposals_into_dataset, 10 | print_instances_class_histogram, 11 | ) 12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata 13 | from .common import DatasetFromList, MapDataset 14 | from .dataset_mapper import DatasetMapper 15 | 16 | # ensure the builtin datasets are registered 17 | from . import datasets, samplers # isort:skip 18 | 19 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 20 | -------------------------------------------------------------------------------- /detectron2/data/datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Common Datasets 4 | 5 | The dataset implemented here do not need to load the data into the final format. 6 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient. 7 | 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images. 9 | Let the downstream decide how to read. 10 | -------------------------------------------------------------------------------- /detectron2/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances 3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta 5 | from .pascal_voc import load_voc_instances, register_pascal_voc 6 | from . import builtin as _builtin # ensure the builtin datasets are registered 7 | 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /detectron2/data/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import numpy as np 5 | import os 6 | import xml.etree.ElementTree as ET 7 | from typing import List, Tuple, Union 8 | 9 | from detectron2.data import DatasetCatalog, MetadataCatalog 10 | from detectron2.structures import BoxMode 11 | from detectron2.utils.file_io import PathManager 12 | 13 | __all__ = ["load_voc_instances", "register_pascal_voc"] 14 | 15 | 16 | # fmt: off 17 | CLASS_NAMES = ( 18 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 19 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 20 | "pottedplant", "sheep", "sofa", "train", "tvmonitor" 21 | ) 22 | # fmt: on 23 | 24 | 25 | def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]): 26 | """ 27 | Load Pascal VOC detection annotations to Detectron2 format. 28 | 29 | Args: 30 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 31 | split (str): one of "train", "test", "val", "trainval" 32 | class_names: list or tuple of class names 33 | """ 34 | with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: 35 | fileids = np.loadtxt(f, dtype=np.str) 36 | 37 | # Needs to read many small annotation files. Makes sense at local 38 | annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) 39 | dicts = [] 40 | for fileid in fileids: 41 | anno_file = os.path.join(annotation_dirname, fileid + ".xml") 42 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") 43 | 44 | with PathManager.open(anno_file) as f: 45 | tree = ET.parse(f) 46 | 47 | r = { 48 | "file_name": jpeg_file, 49 | "image_id": fileid, 50 | "height": int(tree.findall("./size/height")[0].text), 51 | "width": int(tree.findall("./size/width")[0].text), 52 | } 53 | instances = [] 54 | 55 | for obj in tree.findall("object"): 56 | cls = obj.find("name").text 57 | # We include "difficult" samples in training. 58 | # Based on limited experiments, they don't hurt accuracy. 59 | # difficult = int(obj.find("difficult").text) 60 | # if difficult == 1: 61 | # continue 62 | bbox = obj.find("bndbox") 63 | bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] 64 | # Original annotations are integers in the range [1, W or H] 65 | # Assuming they mean 1-based pixel indices (inclusive), 66 | # a box with annotation (xmin=1, xmax=W) covers the whole image. 67 | # In coordinate space this is represented by (xmin=0, xmax=W) 68 | bbox[0] -= 1.0 69 | bbox[1] -= 1.0 70 | instances.append( 71 | {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} 72 | ) 73 | r["annotations"] = instances 74 | dicts.append(r) 75 | return dicts 76 | 77 | 78 | def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES): 79 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names)) 80 | MetadataCatalog.get(name).set( 81 | thing_classes=list(class_names), dirname=dirname, year=year, split=split 82 | ) 83 | -------------------------------------------------------------------------------- /detectron2/data/datasets/register_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import register_coco_instances # noqa 3 | from .coco_panoptic import register_coco_panoptic_separated # noqa 4 | -------------------------------------------------------------------------------- /detectron2/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, RandomSubsetTrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | "RandomSubsetTrainingSampler" 11 | ] 12 | -------------------------------------------------------------------------------- /detectron2/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | """ 13 | 14 | def __init__(self, sampler, group_ids, batch_size): 15 | """ 16 | Args: 17 | sampler (Sampler): Base sampler. 18 | group_ids (list[int]): If the sampler produces indices in range [0, N), 19 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 20 | The group ids must be a set of integers in the range [0, num_groups). 21 | batch_size (int): Size of mini-batch. 22 | """ 23 | if not isinstance(sampler, Sampler): 24 | raise ValueError( 25 | "sampler should be an instance of " 26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 27 | ) 28 | self.sampler = sampler 29 | self.group_ids = np.asarray(group_ids) 30 | assert self.group_ids.ndim == 1 31 | self.batch_size = batch_size 32 | groups = np.unique(self.group_ids).tolist() 33 | 34 | # buffer the indices of each group until batch size is reached 35 | self.buffer_per_group = {k: [] for k in groups} 36 | 37 | def __iter__(self): 38 | for idx in self.sampler: 39 | group_id = self.group_ids[idx] 40 | group_buffer = self.buffer_per_group[group_id] 41 | group_buffer.append(idx) 42 | if len(group_buffer) == self.batch_size: 43 | yield group_buffer[:] # yield a copy of the list 44 | del group_buffer[:] 45 | 46 | def __len__(self): 47 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 48 | -------------------------------------------------------------------------------- /detectron2/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from fvcore.transforms.transform import Transform, TransformList # order them first 3 | from fvcore.transforms.transform import * 4 | from .transform import * 5 | from .augmentation import * 6 | from .augmentation_impl import * 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /detectron2/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 10 | # but still make them available here 11 | from .hooks import * 12 | from .defaults import * 13 | -------------------------------------------------------------------------------- /detectron2/engine/launch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import torch 4 | import torch.distributed as dist 5 | import torch.multiprocessing as mp 6 | 7 | from detectron2.utils import comm 8 | 9 | __all__ = ["launch"] 10 | 11 | 12 | def _find_free_port(): 13 | import socket 14 | 15 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 16 | # Binding to port 0 will cause the OS to find an available port for us 17 | sock.bind(("", 0)) 18 | port = sock.getsockname()[1] 19 | sock.close() 20 | # NOTE: there is still a chance the port could be taken by other processes. 21 | return port 22 | 23 | 24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()): 25 | """ 26 | Launch multi-gpu or distributed training. 27 | This function must be called on all machines involved in the training. 28 | It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. 29 | 30 | Args: 31 | main_func: a function that will be called by `main_func(*args)` 32 | num_gpus_per_machine (int): number of GPUs per machine 33 | num_machines (int): the total number of machines 34 | machine_rank (int): the rank of this machine 35 | dist_url (str): url to connect to for distributed jobs, including protocol 36 | e.g. "tcp://127.0.0.1:8686". 37 | Can be set to "auto" to automatically select a free port on localhost 38 | args (tuple): arguments passed to main_func 39 | """ 40 | world_size = num_machines * num_gpus_per_machine 41 | if world_size > 1: 42 | # https://github.com/pytorch/pytorch/pull/14391 43 | # TODO prctl in spawned processes 44 | 45 | if dist_url == "auto": 46 | assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." 47 | port = _find_free_port() 48 | dist_url = f"tcp://127.0.0.1:{port}" 49 | if num_machines > 1 and dist_url.startswith("file://"): 50 | logger = logging.getLogger(__name__) 51 | logger.warning( 52 | "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" 53 | ) 54 | 55 | mp.spawn( 56 | _distributed_worker, 57 | nprocs=num_gpus_per_machine, 58 | args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args), 59 | daemon=False, 60 | ) 61 | else: 62 | main_func(*args) 63 | 64 | 65 | def _distributed_worker( 66 | local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args 67 | ): 68 | assert torch.cuda.is_available(), "cuda is not available. Please check your installation." 69 | global_rank = machine_rank * num_gpus_per_machine + local_rank 70 | try: 71 | dist.init_process_group( 72 | backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank 73 | ) 74 | except Exception as e: 75 | logger = logging.getLogger(__name__) 76 | logger.error("Process group URL: {}".format(dist_url)) 77 | raise e 78 | # synchronize is needed here to prevent a possible timeout after calling init_process_group 79 | # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 80 | comm.synchronize() 81 | 82 | assert num_gpus_per_machine <= torch.cuda.device_count() 83 | torch.cuda.set_device(local_rank) 84 | 85 | # Setup the local process group (which contains ranks within the same machine) 86 | assert comm._LOCAL_PROCESS_GROUP is None 87 | num_machines = world_size // num_gpus_per_machine 88 | for i in range(num_machines): 89 | ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) 90 | pg = dist.new_group(ranks_on_i) 91 | if i == machine_rank: 92 | comm._LOCAL_PROCESS_GROUP = pg 93 | 94 | main_func(*args) 95 | -------------------------------------------------------------------------------- /detectron2/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator 3 | from .coco_evaluation import COCOEvaluator 4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator 5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 6 | from .lvis_evaluation import LVISEvaluator 7 | from .panoptic_evaluation import COCOPanopticEvaluator 8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 9 | from .sem_seg_evaluation import SemSegEvaluator 10 | from .testing import print_csv_format, verify_results 11 | 12 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 13 | -------------------------------------------------------------------------------- /detectron2/evaluation/fast_eval_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | import numpy as np 4 | import time 5 | from pycocotools.cocoeval import COCOeval 6 | 7 | from detectron2 import _C 8 | 9 | 10 | class COCOeval_opt(COCOeval): 11 | """ 12 | This is a slightly modified version of the original COCO API, where the functions evaluateImg() 13 | and accumulate() are implemented in C++ to speedup evaluation 14 | """ 15 | 16 | def evaluate(self): 17 | """ 18 | Run per image evaluation on given images and store results in self.evalImgs_cpp, a 19 | datastructure that isn't readable from Python but is used by a c++ implementation of 20 | accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure 21 | self.evalImgs because this datastructure is a computational bottleneck. 22 | :return: None 23 | """ 24 | tic = time.time() 25 | 26 | print("Running per image evaluation...") 27 | p = self.params 28 | # add backward compatibility if useSegm is specified in params 29 | if p.useSegm is not None: 30 | p.iouType = "segm" if p.useSegm == 1 else "bbox" 31 | print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)) 32 | print("Evaluate annotation type *{}*".format(p.iouType)) 33 | p.imgIds = list(np.unique(p.imgIds)) 34 | if p.useCats: 35 | p.catIds = list(np.unique(p.catIds)) 36 | p.maxDets = sorted(p.maxDets) 37 | self.params = p 38 | 39 | self._prepare() 40 | 41 | # loop through images, area range, max detection number 42 | catIds = p.catIds if p.useCats else [-1] 43 | 44 | if p.iouType == "segm" or p.iouType == "bbox": 45 | computeIoU = self.computeIoU 46 | elif p.iouType == "keypoints": 47 | computeIoU = self.computeOks 48 | self.ious = { 49 | (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds 50 | } 51 | 52 | maxDet = p.maxDets[-1] 53 | 54 | # <<<< Beginning of code differences with original COCO API 55 | def convert_instances_to_cpp(instances, is_det=False): 56 | # Convert annotations for a list of instances in an image to a format that's fast 57 | # to access in C++ 58 | instances_cpp = [] 59 | for instance in instances: 60 | instance_cpp = _C.InstanceAnnotation( 61 | int(instance["id"]), 62 | instance["score"] if is_det else instance.get("score", 0.0), 63 | instance["area"], 64 | bool(instance.get("iscrowd", 0)), 65 | bool(instance.get("ignore", 0)), 66 | ) 67 | instances_cpp.append(instance_cpp) 68 | return instances_cpp 69 | 70 | # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ 71 | ground_truth_instances = [ 72 | [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] 73 | for imgId in p.imgIds 74 | ] 75 | detected_instances = [ 76 | [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds] 77 | for imgId in p.imgIds 78 | ] 79 | ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] 80 | 81 | if not p.useCats: 82 | # For each image, flatten per-category lists into a single list 83 | ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances] 84 | detected_instances = [[[o for c in i for o in c]] for i in detected_instances] 85 | 86 | # Call C++ implementation of self.evaluateImgs() 87 | self._evalImgs_cpp = _C.COCOevalEvaluateImages( 88 | p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances 89 | ) 90 | self._evalImgs = None 91 | 92 | self._paramsEval = copy.deepcopy(self.params) 93 | toc = time.time() 94 | print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) 95 | # >>>> End of code differences with original COCO API 96 | 97 | def accumulate(self): 98 | """ 99 | Accumulate per image evaluation results and store the result in self.eval. Does not 100 | support changing parameter settings from those used by self.evaluate() 101 | """ 102 | print("Accumulating evaluation results...") 103 | tic = time.time() 104 | if not hasattr(self, "_evalImgs_cpp"): 105 | print("Please run evaluate() first") 106 | 107 | self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) 108 | 109 | # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections 110 | self.eval["recall"] = np.array(self.eval["recall"]).reshape( 111 | self.eval["counts"][:1] + self.eval["counts"][2:] 112 | ) 113 | 114 | # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X 115 | # num_area_ranges X num_max_detections 116 | self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"]) 117 | self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) 118 | toc = time.time() 119 | print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)) 120 | -------------------------------------------------------------------------------- /detectron2/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import OrderedDict 7 | from collections.abc import Mapping 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 19 | logger = logging.getLogger(__name__) 20 | for task, res in results.items(): 21 | # Don't print "AP-category" metrics since they are usually not tracked. 22 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 23 | logger.info("copypaste: Task: {}".format(task)) 24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 26 | 27 | 28 | def verify_results(cfg, results): 29 | """ 30 | Args: 31 | results (OrderedDict[dict]): task_name -> {metric -> score} 32 | 33 | Returns: 34 | bool: whether the verification succeeds or not 35 | """ 36 | expected_results = cfg.TEST.EXPECTED_RESULTS 37 | if not len(expected_results): 38 | return True 39 | 40 | ok = True 41 | for task, metric, expected, tolerance in expected_results: 42 | actual = results[task].get(metric, None) 43 | if actual is None: 44 | ok = False 45 | continue 46 | if not np.isfinite(actual): 47 | ok = False 48 | continue 49 | diff = abs(actual - expected) 50 | if diff > tolerance: 51 | ok = False 52 | 53 | logger = logging.getLogger(__name__) 54 | if not ok: 55 | logger.error("Result verification failed!") 56 | logger.error("Expected Results: " + str(expected_results)) 57 | logger.error("Actual Results: " + pprint.pformat(results)) 58 | 59 | sys.exit(1) 60 | else: 61 | logger.info("Results verification passed.") 62 | return ok 63 | 64 | 65 | def flatten_results_dict(results): 66 | """ 67 | Expand a hierarchical dict of scalars into a flat dict of scalars. 68 | If results[k1][k2][k3] = v, the returned dict will have the entry 69 | {"k1/k2/k3": v}. 70 | 71 | Args: 72 | results (dict): 73 | """ 74 | r = {} 75 | for k, v in results.items(): 76 | if isinstance(v, Mapping): 77 | v = flatten_results_dict(v) 78 | for kk, vv in v.items(): 79 | r[k + "/" + kk] = vv 80 | else: 81 | r[k] = v 82 | return r 83 | -------------------------------------------------------------------------------- /detectron2/export/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains code to prepare a detectron2 model for deployment. 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. 4 | 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. 6 | 7 | 8 | ### Acknowledgements 9 | 10 | Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools. 11 | 12 | Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who 13 | help export Detectron2 models to TorchScript. 14 | -------------------------------------------------------------------------------- /detectron2/export/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .api import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /detectron2/export/caffe2_inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | import numpy as np 5 | from itertools import count 6 | import torch 7 | from caffe2.proto import caffe2_pb2 8 | from caffe2.python import core 9 | 10 | from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format 11 | from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | # ===== ref: mobile-vision's 'Caffe2Wrapper' class ====== 17 | class ProtobufModel(torch.nn.Module): 18 | """ 19 | Wrapper of a caffe2's protobuf model. 20 | It works just like nn.Module, but running caffe2 under the hood. 21 | Input/Output are Dict[str, tensor] whose keys are in external_input/output. 22 | """ 23 | 24 | _ids = count(0) 25 | 26 | def __init__(self, predict_net, init_net): 27 | logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...") 28 | super().__init__() 29 | assert isinstance(predict_net, caffe2_pb2.NetDef) 30 | assert isinstance(init_net, caffe2_pb2.NetDef) 31 | # create unique temporary workspace for each instance 32 | self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids)) 33 | self.net = core.Net(predict_net) 34 | 35 | logger.info("Running init_net once to fill the parameters ...") 36 | with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws: 37 | ws.RunNetOnce(init_net) 38 | uninitialized_external_input = [] 39 | for blob in self.net.Proto().external_input: 40 | if blob not in ws.Blobs(): 41 | uninitialized_external_input.append(blob) 42 | ws.CreateBlob(blob) 43 | ws.CreateNet(self.net) 44 | 45 | self._error_msgs = set() 46 | self._input_blobs = uninitialized_external_input 47 | 48 | def _infer_output_devices(self, inputs): 49 | """ 50 | Returns: 51 | list[str]: list of device for each external output 52 | """ 53 | 54 | def _get_device_type(torch_tensor): 55 | assert torch_tensor.device.type in ["cpu", "cuda"] 56 | assert torch_tensor.device.index == 0 57 | return torch_tensor.device.type 58 | 59 | predict_net = self.net.Proto() 60 | input_device_types = { 61 | (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs) 62 | } 63 | device_type_map = infer_device_type( 64 | predict_net, known_status=input_device_types, device_name_style="pytorch" 65 | ) 66 | ssa, versions = core.get_ssa(predict_net) 67 | versioned_outputs = [(name, versions[name]) for name in predict_net.external_output] 68 | output_devices = [device_type_map[outp] for outp in versioned_outputs] 69 | return output_devices 70 | 71 | def forward(self, inputs): 72 | """ 73 | Args: 74 | inputs (tuple[torch.Tensor]) 75 | 76 | Returns: 77 | dict[str, torch.Tensor] 78 | """ 79 | assert len(inputs) == len(self._input_blobs), ( 80 | f"Length of inputs ({len(inputs)}) " 81 | f"doesn't match the required input blobs: {self._input_blobs}" 82 | ) 83 | 84 | with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws: 85 | for b, tensor in zip(self._input_blobs, inputs): 86 | ws.FeedBlob(b, tensor) 87 | 88 | try: 89 | ws.RunNet(self.net.Proto().name) 90 | except RuntimeError as e: 91 | if not str(e) in self._error_msgs: 92 | self._error_msgs.add(str(e)) 93 | logger.warning("Encountered new RuntimeError: \n{}".format(str(e))) 94 | logger.warning("Catch the error and use partial results.") 95 | 96 | c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output] 97 | # Remove outputs of current run, this is necessary in order to 98 | # prevent fetching the result from previous run if the model fails 99 | # in the middle. 100 | for b in self.net.Proto().external_output: 101 | # Needs to create uninitialized blob to make the net runable. 102 | # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b), 103 | # but there'no such API. 104 | ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).") 105 | 106 | # Cast output to torch.Tensor on the desired device 107 | output_devices = ( 108 | self._infer_output_devices(inputs) 109 | if any(t.device.type != "cpu" for t in inputs) 110 | else ["cpu" for _ in self.net.Proto().external_output] 111 | ) 112 | 113 | outputs = [] 114 | for name, c2_output, device in zip( 115 | self.net.Proto().external_output, c2_outputs, output_devices 116 | ): 117 | if not isinstance(c2_output, np.ndarray): 118 | raise RuntimeError( 119 | "Invalid output for blob {}, received: {}".format(name, c2_output) 120 | ) 121 | outputs.append(torch.Tensor(c2_output).to(device=device)) 122 | # TODO change to tuple in the future 123 | return dict(zip(self.net.Proto().external_output, outputs)) 124 | 125 | 126 | class ProtobufDetectionModel(torch.nn.Module): 127 | """ 128 | A class works just like a pytorch meta arch in terms of inference, but running 129 | caffe2 model under the hood. 130 | """ 131 | 132 | def __init__(self, predict_net, init_net, *, convert_outputs=None): 133 | """ 134 | Args: 135 | predict_net, init_net (core.Net): caffe2 nets 136 | convert_outptus (callable): a function that converts caffe2 137 | outputs to the same format of the original pytorch model. 138 | By default, use the one defined in the caffe2 meta_arch. 139 | """ 140 | super().__init__() 141 | self.protobuf_model = ProtobufModel(predict_net, init_net) 142 | self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0) 143 | self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii") 144 | 145 | if convert_outputs is None: 146 | meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN") 147 | meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")] 148 | self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net) 149 | else: 150 | self._convert_outputs = convert_outputs 151 | 152 | def _convert_inputs(self, batched_inputs): 153 | # currently all models convert inputs in the same way 154 | return convert_batched_inputs_to_c2_format( 155 | batched_inputs, self.size_divisibility, self.device 156 | ) 157 | 158 | def forward(self, batched_inputs): 159 | c2_inputs = self._convert_inputs(batched_inputs) 160 | c2_results = self.protobuf_model(c2_inputs) 161 | return self._convert_outputs(batched_inputs, c2_inputs, c2_results) 162 | -------------------------------------------------------------------------------- /detectron2/export/caffe2_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import contextlib 4 | from unittest import mock 5 | import torch 6 | 7 | from detectron2.modeling import poolers 8 | from detectron2.modeling.proposal_generator import rpn 9 | from detectron2.modeling.roi_heads import keypoint_head, mask_head 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers 11 | 12 | from .c10 import ( 13 | Caffe2Compatible, 14 | Caffe2FastRCNNOutputsInference, 15 | Caffe2KeypointRCNNInference, 16 | Caffe2MaskRCNNInference, 17 | Caffe2ROIPooler, 18 | Caffe2RPN, 19 | ) 20 | 21 | 22 | class GenericMixin(object): 23 | pass 24 | 25 | 26 | class Caffe2CompatibleConverter(object): 27 | """ 28 | A GenericUpdater which implements the `create_from` interface, by modifying 29 | module object and assign it with another class replaceCls. 30 | """ 31 | 32 | def __init__(self, replaceCls): 33 | self.replaceCls = replaceCls 34 | 35 | def create_from(self, module): 36 | # update module's class to the new class 37 | assert isinstance(module, torch.nn.Module) 38 | if issubclass(self.replaceCls, GenericMixin): 39 | # replaceCls should act as mixin, create a new class on-the-fly 40 | new_class = type( 41 | "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), 42 | (self.replaceCls, module.__class__), 43 | {}, # {"new_method": lambda self: ...}, 44 | ) 45 | module.__class__ = new_class 46 | else: 47 | # replaceCls is complete class, this allow arbitrary class swap 48 | module.__class__ = self.replaceCls 49 | 50 | # initialize Caffe2Compatible 51 | if isinstance(module, Caffe2Compatible): 52 | module.tensor_mode = False 53 | 54 | return module 55 | 56 | 57 | def patch(model, target, updater, *args, **kwargs): 58 | """ 59 | recursively (post-order) update all modules with the target type and its 60 | subclasses, make a initialization/composition/inheritance/... via the 61 | updater.create_from. 62 | """ 63 | for name, module in model.named_children(): 64 | model._modules[name] = patch(module, target, updater, *args, **kwargs) 65 | if isinstance(model, target): 66 | return updater.create_from(model, *args, **kwargs) 67 | return model 68 | 69 | 70 | def patch_generalized_rcnn(model): 71 | ccc = Caffe2CompatibleConverter 72 | model = patch(model, rpn.RPN, ccc(Caffe2RPN)) 73 | model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) 74 | 75 | return model 76 | 77 | 78 | @contextlib.contextmanager 79 | def mock_fastrcnn_outputs_inference( 80 | tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers 81 | ): 82 | with mock.patch.object( 83 | box_predictor_type, 84 | "inference", 85 | autospec=True, 86 | side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), 87 | ) as mocked_func: 88 | yield 89 | if check: 90 | assert mocked_func.call_count > 0 91 | 92 | 93 | @contextlib.contextmanager 94 | def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): 95 | with mock.patch( 96 | "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() 97 | ) as mocked_func: 98 | yield 99 | if check: 100 | assert mocked_func.call_count > 0 101 | 102 | 103 | @contextlib.contextmanager 104 | def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): 105 | with mock.patch( 106 | "{}.keypoint_rcnn_inference".format(patched_module), 107 | side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), 108 | ) as mocked_func: 109 | yield 110 | if check: 111 | assert mocked_func.call_count > 0 112 | 113 | 114 | class ROIHeadsPatcher: 115 | def __init__(self, heads, use_heatmap_max_keypoint): 116 | self.heads = heads 117 | self.use_heatmap_max_keypoint = use_heatmap_max_keypoint 118 | 119 | @contextlib.contextmanager 120 | def mock_roi_heads(self, tensor_mode=True): 121 | """ 122 | Patching several inference functions inside ROIHeads and its subclasses 123 | 124 | Args: 125 | tensor_mode (bool): whether the inputs/outputs are caffe2's tensor 126 | format or not. Default to True. 127 | """ 128 | # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` 129 | # are called inside the same file as BaseXxxHead due to using mock.patch. 130 | kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ 131 | mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ 132 | 133 | mock_ctx_managers = [ 134 | mock_fastrcnn_outputs_inference( 135 | tensor_mode=tensor_mode, 136 | check=True, 137 | box_predictor_type=type(self.heads.box_predictor), 138 | ) 139 | ] 140 | if getattr(self.heads, "keypoint_on", False): 141 | mock_ctx_managers += [ 142 | mock_keypoint_rcnn_inference( 143 | tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint 144 | ) 145 | ] 146 | if getattr(self.heads, "mask_on", False): 147 | mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] 148 | 149 | with contextlib.ExitStack() as stack: # python 3.3+ 150 | for mgr in mock_ctx_managers: 151 | stack.enter_context(mgr) 152 | yield 153 | -------------------------------------------------------------------------------- /detectron2/export/torchscript.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import torch 5 | 6 | from detectron2.utils.file_io import PathManager 7 | 8 | from .torchscript_patch import patch_instances, patch_nonscriptable_classes 9 | 10 | 11 | def export_torchscript_with_instances(model, fields): 12 | """ 13 | Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since 14 | attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult 15 | for torchscript to support it out of the box. This function is made to support scripting 16 | a model that uses :class:`Instances`. It does the following: 17 | 18 | 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``, 19 | but with all attributes been "static". 20 | The attributes need to be statically declared in the ``fields`` argument. 21 | 2. Register ``new_Instances`` to torchscript, and force torchscript to 22 | use it when trying to compile ``Instances``. 23 | 24 | After this function, the process will be reverted. User should be able to script another model 25 | using different fields. 26 | 27 | Example: 28 | Assume that ``Instances`` in the model consist of two attributes named 29 | ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and 30 | :class:`Tensor` respectively during inference. You can call this function like: 31 | 32 | :: 33 | fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor} 34 | torchscipt_model = export_torchscript_with_instances(model, fields) 35 | 36 | Note: 37 | Currently we only support models in evaluation mode. 38 | 39 | Args: 40 | model (nn.Module): The input model to be exported to torchscript. 41 | fields (Dict[str, type]): Attribute names and corresponding type that 42 | ``Instances`` will use in the model. Note that all attributes used in ``Instances`` 43 | need to be added, regarldess of whether they are inputs/outputs of the model. 44 | Data type not defined in detectron2 is not supported for now. 45 | 46 | Returns: 47 | torch.jit.ScriptModule: the input model in torchscript format 48 | """ 49 | patch_nonscriptable_classes() 50 | 51 | assert ( 52 | not model.training 53 | ), "Currently we only support exporting models in evaluation mode to torchscript" 54 | 55 | with patch_instances(fields): 56 | scripted_model = torch.jit.script(model) 57 | return scripted_model 58 | 59 | 60 | def dump_torchscript_IR(model, dir): 61 | """ 62 | Dump IR of a TracedModule/ScriptModule at various levels. 63 | Useful for debugging. 64 | 65 | Args: 66 | model (TracedModule or ScriptModule): traced or scripted module 67 | dir (str): output directory to dump files. 68 | """ 69 | PathManager.mkdirs(dir) 70 | 71 | def _get_script_mod(mod): 72 | if isinstance(mod, torch.jit.TracedModule): 73 | return mod._actual_script_module 74 | return mod 75 | 76 | # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code 77 | with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: 78 | 79 | def get_code(mod): 80 | # Try a few ways to get code using private attributes. 81 | try: 82 | # This contains more information than just `mod.code` 83 | return _get_script_mod(mod)._c.code 84 | except AttributeError: 85 | pass 86 | try: 87 | return mod.code 88 | except AttributeError: 89 | return None 90 | 91 | def dump_code(prefix, mod): 92 | code = get_code(mod) 93 | name = prefix or "root model" 94 | if code is None: 95 | f.write(f"Could not found code for {name} (type={mod.original_name})\n") 96 | f.write("\n") 97 | else: 98 | f.write(f"\nCode for {name}, type={mod.original_name}:\n") 99 | f.write(code) 100 | f.write("\n") 101 | f.write("-" * 80) 102 | 103 | for name, m in mod.named_children(): 104 | dump_code(prefix + "." + name, m) 105 | 106 | dump_code("", model) 107 | 108 | # Recursively dump IR of all modules 109 | with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: 110 | try: 111 | f.write(_get_script_mod(model)._c.dump_to_str(True, False, False)) 112 | except AttributeError: 113 | pass 114 | 115 | # Dump IR of the entire graph (all submodules inlined) 116 | with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: 117 | f.write(str(model.inlined_graph)) 118 | 119 | # Dump the model structure in pytorch style 120 | with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: 121 | f.write(str(model)) 122 | -------------------------------------------------------------------------------- /detectron2/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deform_conv import DeformConv, ModulatedDeformConv 4 | from .mask_ops import paste_masks_in_image 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated 6 | from .roi_align import ROIAlign, roi_align 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 8 | from .shape_spec import ShapeSpec 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple 10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d 11 | from .aspp import ASPP 12 | 13 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 14 | -------------------------------------------------------------------------------- /detectron2/layers/aspp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from copy import deepcopy 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from .batch_norm import get_norm 10 | from .wrappers import Conv2d 11 | 12 | 13 | class ASPP(nn.Module): 14 | """ 15 | Atrous Spatial Pyramid Pooling (ASPP). 16 | """ 17 | 18 | def __init__( 19 | self, 20 | in_channels, 21 | out_channels, 22 | dilations, 23 | *, 24 | norm, 25 | activation, 26 | pool_kernel_size=None, 27 | dropout: float = 0.0, 28 | ): 29 | """ 30 | Args: 31 | in_channels (int): number of input channels for ASPP. 32 | out_channels (int): number of output channels. 33 | dilations (list): a list of 3 dilations in ASPP. 34 | norm (str or callable): normalization for all conv layers. 35 | See :func:`layers.get_norm` for supported format. norm is 36 | applied to all conv layers except the conv following 37 | global average pooling. 38 | activation (callable): activation function. 39 | pool_kernel_size (tuple, list): the average pooling size (kh, kw) 40 | for image pooling layer in ASPP. If set to None, it always 41 | performs global average pooling. If not None, it must be 42 | divisible by the shape of inputs in forward(). It is recommended 43 | to use a fixed input feature size in training, and set this 44 | option to match this size, so that it performs global average 45 | pooling in training, and the size of the pooling window stays 46 | consistent in inference. 47 | dropout (float): apply dropout on the output of ASPP. It is used in 48 | the official DeepLab implementation with a rate of 0.1: 49 | https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa 50 | """ 51 | super(ASPP, self).__init__() 52 | assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations)) 53 | self.pool_kernel_size = pool_kernel_size 54 | self.dropout = dropout 55 | use_bias = norm == "" 56 | self.convs = nn.ModuleList() 57 | # conv 1x1 58 | self.convs.append( 59 | Conv2d( 60 | in_channels, 61 | out_channels, 62 | kernel_size=1, 63 | bias=use_bias, 64 | norm=get_norm(norm, out_channels), 65 | activation=deepcopy(activation), 66 | ) 67 | ) 68 | weight_init.c2_xavier_fill(self.convs[-1]) 69 | # atrous convs 70 | for dilation in dilations: 71 | self.convs.append( 72 | Conv2d( 73 | in_channels, 74 | out_channels, 75 | kernel_size=3, 76 | padding=dilation, 77 | dilation=dilation, 78 | bias=use_bias, 79 | norm=get_norm(norm, out_channels), 80 | activation=deepcopy(activation), 81 | ) 82 | ) 83 | weight_init.c2_xavier_fill(self.convs[-1]) 84 | # image pooling 85 | # We do not add BatchNorm because the spatial resolution is 1x1, 86 | # the original TF implementation has BatchNorm. 87 | if pool_kernel_size is None: 88 | image_pooling = nn.Sequential( 89 | nn.AdaptiveAvgPool2d(1), 90 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), 91 | ) 92 | else: 93 | image_pooling = nn.Sequential( 94 | nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1), 95 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), 96 | ) 97 | weight_init.c2_xavier_fill(image_pooling[1]) 98 | self.convs.append(image_pooling) 99 | 100 | self.project = Conv2d( 101 | 5 * out_channels, 102 | out_channels, 103 | kernel_size=1, 104 | bias=use_bias, 105 | norm=get_norm(norm, out_channels), 106 | activation=deepcopy(activation), 107 | ) 108 | weight_init.c2_xavier_fill(self.project) 109 | 110 | def forward(self, x): 111 | size = x.shape[-2:] 112 | if self.pool_kernel_size is not None: 113 | if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]: 114 | raise ValueError( 115 | "`pool_kernel_size` must be divisible by the shape of inputs. " 116 | "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size) 117 | ) 118 | res = [] 119 | for conv in self.convs: 120 | res.append(conv(x)) 121 | res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False) 122 | res = torch.cat(res, dim=1) 123 | res = self.project(res) 124 | res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res 125 | return res 126 | -------------------------------------------------------------------------------- /detectron2/layers/blocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import fvcore.nn.weight_init as weight_init 5 | from torch import nn 6 | 7 | from .batch_norm import FrozenBatchNorm2d, get_norm 8 | from .wrappers import Conv2d 9 | 10 | 11 | """ 12 | CNN building blocks. 13 | """ 14 | 15 | 16 | class CNNBlockBase(nn.Module): 17 | """ 18 | A CNN block is assumed to have input channels, output channels and a stride. 19 | The input and output of `forward()` method must be NCHW tensors. 20 | The method can perform arbitrary computation but must match the given 21 | channels and stride specification. 22 | 23 | Attribute: 24 | in_channels (int): 25 | out_channels (int): 26 | stride (int): 27 | """ 28 | 29 | def __init__(self, in_channels, out_channels, stride): 30 | """ 31 | The `__init__` method of any subclass should also contain these arguments. 32 | 33 | Args: 34 | in_channels (int): 35 | out_channels (int): 36 | stride (int): 37 | """ 38 | super().__init__() 39 | self.in_channels = in_channels 40 | self.out_channels = out_channels 41 | self.stride = stride 42 | 43 | def freeze(self): 44 | """ 45 | Make this block not trainable. 46 | This method sets all parameters to `requires_grad=False`, 47 | and convert all BatchNorm layers to FrozenBatchNorm 48 | 49 | Returns: 50 | the block itself 51 | """ 52 | for p in self.parameters(): 53 | p.requires_grad = False 54 | FrozenBatchNorm2d.convert_frozen_batchnorm(self) 55 | return self 56 | 57 | 58 | class DepthwiseSeparableConv2d(nn.Module): 59 | """ 60 | A kxk depthwise convolution + a 1x1 convolution. 61 | 62 | In :paper:`xception`, norm & activation are applied on the second conv. 63 | :paper:`mobilenet` uses norm & activation on both convs. 64 | """ 65 | 66 | def __init__( 67 | self, 68 | in_channels, 69 | out_channels, 70 | kernel_size=3, 71 | padding=1, 72 | *, 73 | norm1=None, 74 | activation1=None, 75 | norm2=None, 76 | activation2=None, 77 | ): 78 | """ 79 | Args: 80 | norm1, norm2 (str or callable): normalization for the two conv layers. 81 | activation1, activation2 (callable(Tensor) -> Tensor): activation 82 | function for the two conv layers. 83 | """ 84 | super().__init__() 85 | self.depthwise = Conv2d( 86 | in_channels, 87 | in_channels, 88 | kernel_size=kernel_size, 89 | padding=padding, 90 | groups=in_channels, 91 | bias=not norm1, 92 | norm=get_norm(norm1, in_channels), 93 | activation=activation1, 94 | ) 95 | self.pointwise = Conv2d( 96 | in_channels, 97 | out_channels, 98 | kernel_size=1, 99 | bias=not norm2, 100 | norm=get_norm(norm2, out_channels), 101 | activation=activation2, 102 | ) 103 | 104 | # default initialization 105 | weight_init.c2_msra_fill(self.depthwise) 106 | weight_init.c2_msra_fill(self.pointwise) 107 | 108 | def forward(self, x): 109 | return self.pointwise(self.depthwise(x)) 110 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/ROIAlign/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor ROIAlign_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio, 14 | bool aligned); 15 | 16 | at::Tensor ROIAlign_backward_cpu( 17 | const at::Tensor& grad, 18 | const at::Tensor& rois, 19 | const float spatial_scale, 20 | const int pooled_height, 21 | const int pooled_width, 22 | const int batch_size, 23 | const int channels, 24 | const int height, 25 | const int width, 26 | const int sampling_ratio, 27 | bool aligned); 28 | 29 | #if defined(WITH_CUDA) || defined(WITH_HIP) 30 | at::Tensor ROIAlign_forward_cuda( 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int sampling_ratio, 37 | bool aligned); 38 | 39 | at::Tensor ROIAlign_backward_cuda( 40 | const at::Tensor& grad, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width, 45 | const int batch_size, 46 | const int channels, 47 | const int height, 48 | const int width, 49 | const int sampling_ratio, 50 | bool aligned); 51 | #endif 52 | 53 | // Interface for Python 54 | inline at::Tensor ROIAlign_forward( 55 | const at::Tensor& input, 56 | const at::Tensor& rois, 57 | const float spatial_scale, 58 | const int pooled_height, 59 | const int pooled_width, 60 | const int sampling_ratio, 61 | bool aligned) { 62 | if (input.is_cuda()) { 63 | #if defined(WITH_CUDA) || defined(WITH_HIP) 64 | return ROIAlign_forward_cuda( 65 | input, 66 | rois, 67 | spatial_scale, 68 | pooled_height, 69 | pooled_width, 70 | sampling_ratio, 71 | aligned); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | return ROIAlign_forward_cpu( 77 | input, 78 | rois, 79 | spatial_scale, 80 | pooled_height, 81 | pooled_width, 82 | sampling_ratio, 83 | aligned); 84 | } 85 | 86 | inline at::Tensor ROIAlign_backward( 87 | const at::Tensor& grad, 88 | const at::Tensor& rois, 89 | const float spatial_scale, 90 | const int pooled_height, 91 | const int pooled_width, 92 | const int batch_size, 93 | const int channels, 94 | const int height, 95 | const int width, 96 | const int sampling_ratio, 97 | bool aligned) { 98 | if (grad.is_cuda()) { 99 | #if defined(WITH_CUDA) || defined(WITH_HIP) 100 | return ROIAlign_backward_cuda( 101 | grad, 102 | rois, 103 | spatial_scale, 104 | pooled_height, 105 | pooled_width, 106 | batch_size, 107 | channels, 108 | height, 109 | width, 110 | sampling_ratio, 111 | aligned); 112 | #else 113 | AT_ERROR("Not compiled with GPU support"); 114 | #endif 115 | } 116 | return ROIAlign_backward_cpu( 117 | grad, 118 | rois, 119 | spatial_scale, 120 | pooled_height, 121 | pooled_width, 122 | batch_size, 123 | channels, 124 | height, 125 | width, 126 | sampling_ratio, 127 | aligned); 128 | } 129 | 130 | } // namespace detectron2 131 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor ROIAlignRotated_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio); 14 | 15 | at::Tensor ROIAlignRotated_backward_cpu( 16 | const at::Tensor& grad, 17 | const at::Tensor& rois, 18 | const float spatial_scale, 19 | const int pooled_height, 20 | const int pooled_width, 21 | const int batch_size, 22 | const int channels, 23 | const int height, 24 | const int width, 25 | const int sampling_ratio); 26 | 27 | #if defined(WITH_CUDA) || defined(WITH_HIP) 28 | at::Tensor ROIAlignRotated_forward_cuda( 29 | const at::Tensor& input, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int sampling_ratio); 35 | 36 | at::Tensor ROIAlignRotated_backward_cuda( 37 | const at::Tensor& grad, 38 | const at::Tensor& rois, 39 | const float spatial_scale, 40 | const int pooled_height, 41 | const int pooled_width, 42 | const int batch_size, 43 | const int channels, 44 | const int height, 45 | const int width, 46 | const int sampling_ratio); 47 | #endif 48 | 49 | // Interface for Python 50 | inline at::Tensor ROIAlignRotated_forward( 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const float spatial_scale, 54 | const int pooled_height, 55 | const int pooled_width, 56 | const int sampling_ratio) { 57 | if (input.is_cuda()) { 58 | #if defined(WITH_CUDA) || defined(WITH_HIP) 59 | return ROIAlignRotated_forward_cuda( 60 | input, 61 | rois, 62 | spatial_scale, 63 | pooled_height, 64 | pooled_width, 65 | sampling_ratio); 66 | #else 67 | AT_ERROR("Not compiled with GPU support"); 68 | #endif 69 | } 70 | return ROIAlignRotated_forward_cpu( 71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 72 | } 73 | 74 | inline at::Tensor ROIAlignRotated_backward( 75 | const at::Tensor& grad, 76 | const at::Tensor& rois, 77 | const float spatial_scale, 78 | const int pooled_height, 79 | const int pooled_width, 80 | const int batch_size, 81 | const int channels, 82 | const int height, 83 | const int width, 84 | const int sampling_ratio) { 85 | if (grad.is_cuda()) { 86 | #if defined(WITH_CUDA) || defined(WITH_HIP) 87 | return ROIAlignRotated_backward_cuda( 88 | grad, 89 | rois, 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio); 98 | #else 99 | AT_ERROR("Not compiled with GPU support"); 100 | #endif 101 | } 102 | return ROIAlignRotated_backward_cpu( 103 | grad, 104 | rois, 105 | spatial_scale, 106 | pooled_height, 107 | pooled_width, 108 | batch_size, 109 | channels, 110 | height, 111 | width, 112 | sampling_ratio); 113 | } 114 | 115 | } // namespace detectron2 116 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #if defined(WITH_CUDA) || defined(WITH_HIP) 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #if defined(WITH_CUDA) || defined(WITH_HIP) 26 | return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); 33 | } 34 | 35 | } // namespace detectron2 36 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto num_boxes1 = boxes1.size(0); 13 | auto num_boxes2 = boxes2.size(0); 14 | 15 | for (int i = 0; i < num_boxes1; i++) { 16 | for (int j = 0; j < num_boxes2; j++) { 17 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 18 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 19 | } 20 | } 21 | } 22 | 23 | at::Tensor box_iou_rotated_cpu( 24 | // input must be contiguous: 25 | const at::Tensor& boxes1, 26 | const at::Tensor& boxes2) { 27 | auto num_boxes1 = boxes1.size(0); 28 | auto num_boxes2 = boxes2.size(0); 29 | at::Tensor ious = 30 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 31 | 32 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 33 | 34 | // reshape from 1d array to 2d array 35 | auto shape = std::vector{num_boxes1, num_boxes2}; 36 | return ious.reshape(shape); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "box_iou_rotated_utils.h" 7 | 8 | namespace detectron2 { 9 | 10 | // 2D block with 32 * 16 = 512 threads per block 11 | const int BLOCK_DIM_X = 32; 12 | const int BLOCK_DIM_Y = 16; 13 | 14 | template 15 | __global__ void box_iou_rotated_cuda_kernel( 16 | const int n_boxes1, 17 | const int n_boxes2, 18 | const T* dev_boxes1, 19 | const T* dev_boxes2, 20 | T* dev_ious) { 21 | const int row_start = blockIdx.x * blockDim.x; 22 | const int col_start = blockIdx.y * blockDim.y; 23 | 24 | const int row_size = min(n_boxes1 - row_start, blockDim.x); 25 | const int col_size = min(n_boxes2 - col_start, blockDim.y); 26 | 27 | __shared__ float block_boxes1[BLOCK_DIM_X * 5]; 28 | __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; 29 | 30 | // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y 31 | if (threadIdx.x < row_size && threadIdx.y == 0) { 32 | block_boxes1[threadIdx.x * 5 + 0] = 33 | dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; 34 | block_boxes1[threadIdx.x * 5 + 1] = 35 | dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; 36 | block_boxes1[threadIdx.x * 5 + 2] = 37 | dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; 38 | block_boxes1[threadIdx.x * 5 + 3] = 39 | dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; 40 | block_boxes1[threadIdx.x * 5 + 4] = 41 | dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; 42 | } 43 | 44 | if (threadIdx.x < col_size && threadIdx.y == 0) { 45 | block_boxes2[threadIdx.x * 5 + 0] = 46 | dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; 47 | block_boxes2[threadIdx.x * 5 + 1] = 48 | dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; 49 | block_boxes2[threadIdx.x * 5 + 2] = 50 | dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; 51 | block_boxes2[threadIdx.x * 5 + 3] = 52 | dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; 53 | block_boxes2[threadIdx.x * 5 + 4] = 54 | dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; 55 | } 56 | __syncthreads(); 57 | 58 | if (threadIdx.x < row_size && threadIdx.y < col_size) { 59 | int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; 60 | dev_ious[offset] = single_box_iou_rotated( 61 | block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); 62 | } 63 | } 64 | 65 | at::Tensor box_iou_rotated_cuda( 66 | // input must be contiguous 67 | const at::Tensor& boxes1, 68 | const at::Tensor& boxes2) { 69 | using scalar_t = float; 70 | AT_ASSERTM( 71 | boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor"); 72 | AT_ASSERTM( 73 | boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor"); 74 | AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); 75 | AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); 76 | at::cuda::CUDAGuard device_guard(boxes1.device()); 77 | 78 | auto num_boxes1 = boxes1.size(0); 79 | auto num_boxes2 = boxes2.size(0); 80 | 81 | at::Tensor ious = 82 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 83 | 84 | bool transpose = false; 85 | if (num_boxes1 > 0 && num_boxes2 > 0) { 86 | scalar_t *data1 = boxes1.data_ptr(), 87 | *data2 = boxes2.data_ptr(); 88 | 89 | if (num_boxes2 > 65535 * BLOCK_DIM_Y) { 90 | AT_ASSERTM( 91 | num_boxes1 <= 65535 * BLOCK_DIM_Y, 92 | "Too many boxes for box_iou_rotated_cuda!"); 93 | // x dim is allowed to be large, but y dim cannot, 94 | // so we transpose the two to avoid "invalid configuration argument" 95 | // error. We assume one of them is small. Otherwise the result is hard to 96 | // fit in memory anyway. 97 | std::swap(num_boxes1, num_boxes2); 98 | std::swap(data1, data2); 99 | transpose = true; 100 | } 101 | 102 | const int blocks_x = 103 | at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X); 104 | const int blocks_y = 105 | at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y); 106 | 107 | dim3 blocks(blocks_x, blocks_y); 108 | dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); 109 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 110 | 111 | box_iou_rotated_cuda_kernel<<>>( 112 | num_boxes1, 113 | num_boxes2, 114 | data1, 115 | data2, 116 | (scalar_t*)ious.data_ptr()); 117 | 118 | AT_CUDA_CHECK(cudaGetLastError()); 119 | } 120 | 121 | // reshape from 1d array to 2d array 122 | auto shape = std::vector{num_boxes1, num_boxes2}; 123 | if (transpose) { 124 | return ious.view(shape).t(); 125 | } else { 126 | return ious.view(shape); 127 | } 128 | } 129 | 130 | } // namespace detectron2 131 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/cocoeval/cocoeval.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace py = pybind11; 11 | 12 | namespace detectron2 { 13 | 14 | namespace COCOeval { 15 | 16 | // Annotation data for a single object instance in an image 17 | struct InstanceAnnotation { 18 | InstanceAnnotation( 19 | uint64_t id, 20 | double score, 21 | double area, 22 | bool is_crowd, 23 | bool ignore) 24 | : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} 25 | uint64_t id; 26 | double score = 0.; 27 | double area = 0.; 28 | bool is_crowd = false; 29 | bool ignore = false; 30 | }; 31 | 32 | // Stores intermediate results for evaluating detection results for a single 33 | // image that has D detected instances and G ground truth instances. This stores 34 | // matches between detected and ground truth instances 35 | struct ImageEvaluation { 36 | // For each of the D detected instances, the id of the matched ground truth 37 | // instance, or 0 if unmatched 38 | std::vector detection_matches; 39 | 40 | // The detection score of each of the D detected instances 41 | std::vector detection_scores; 42 | 43 | // Marks whether or not each of G instances was ignored from evaluation (e.g., 44 | // because it's outside area_range) 45 | std::vector ground_truth_ignores; 46 | 47 | // Marks whether or not each of D instances was ignored from evaluation (e.g., 48 | // because it's outside aRng) 49 | std::vector detection_ignores; 50 | }; 51 | 52 | template 53 | using ImageCategoryInstances = std::vector>>; 54 | 55 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each 56 | // combination of image, category, area range settings, and IOU thresholds to 57 | // evaluate, it matches detected instances to ground truth instances and stores 58 | // the results into a vector of ImageEvaluation results, which will be 59 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall 60 | // curves. The parameters of nested vectors have the following semantics: 61 | // image_category_ious[i][c][d][g] is the intersection over union of the d'th 62 | // detected instance and g'th ground truth instance of 63 | // category category_ids[c] in image image_ids[i] 64 | // image_category_ground_truth_instances[i][c] is a vector of ground truth 65 | // instances in image image_ids[i] of category category_ids[c] 66 | // image_category_detection_instances[i][c] is a vector of detected 67 | // instances in image image_ids[i] of category category_ids[c] 68 | std::vector EvaluateImages( 69 | const std::vector>& area_ranges, // vector of 2-tuples 70 | int max_detections, 71 | const std::vector& iou_thresholds, 72 | const ImageCategoryInstances>& image_category_ious, 73 | const ImageCategoryInstances& 74 | image_category_ground_truth_instances, 75 | const ImageCategoryInstances& 76 | image_category_detection_instances); 77 | 78 | // C++ implementation of COCOeval.accumulate(), which generates precision 79 | // recall curves for each set of category, IOU threshold, detection area range, 80 | // and max number of detections parameters. It is assumed that the parameter 81 | // evaluations is the return value of the functon COCOeval::EvaluateImages(), 82 | // which was called with the same parameter settings params 83 | py::dict Accumulate( 84 | const py::object& params, 85 | const std::vector& evalutations); 86 | 87 | } // namespace COCOeval 88 | } // namespace detectron2 89 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include 4 | 5 | namespace detectron2 { 6 | int get_cudart_version() { 7 | // Not a ROCM platform: Either HIP is not used, or 8 | // it is used, but platform is not ROCM (i.e. it is CUDA) 9 | #if !defined(__HIP_PLATFORM_HCC__) 10 | return CUDART_VERSION; 11 | #else 12 | int version = 0; 13 | 14 | #if HIP_VERSION_MAJOR != 0 15 | // Create a convention similar to that of CUDA, as assumed by other 16 | // parts of the code. 17 | 18 | version = HIP_VERSION_MINOR; 19 | version += (HIP_VERSION_MAJOR * 100); 20 | #else 21 | hipRuntimeGetVersion(&version); 22 | #endif 23 | return version; 24 | #endif 25 | } 26 | } // namespace detectron2 27 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #if defined(WITH_CUDA) || defined(WITH_HIP) 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #if defined(WITH_CUDA) || defined(WITH_HIP) 29 | return nms_rotated_cuda( 30 | dets.contiguous(), scores.contiguous(), iou_threshold); 31 | #else 32 | AT_ERROR("Not compiled with GPU support"); 33 | #endif 34 | } 35 | 36 | return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.scalar_type() == scores.scalar_type(), 20 | "dets should have the same type as scores"); 21 | 22 | if (dets.numel() == 0) { 23 | return at::empty({0}, dets.options().dtype(at::kLong)); 24 | } 25 | 26 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 27 | 28 | auto ndets = dets.size(0); 29 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 30 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 31 | 32 | auto suppressed = suppressed_t.data_ptr(); 33 | auto keep = keep_t.data_ptr(); 34 | auto order = order_t.data_ptr(); 35 | 36 | int64_t num_to_keep = 0; 37 | 38 | for (int64_t _i = 0; _i < ndets; _i++) { 39 | auto i = order[_i]; 40 | if (suppressed[i] == 1) { 41 | continue; 42 | } 43 | 44 | keep[num_to_keep++] = i; 45 | 46 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 47 | auto j = order[_j]; 48 | if (suppressed[j] == 1) { 49 | continue; 50 | } 51 | 52 | auto ovr = single_box_iou_rotated( 53 | dets[i].data_ptr(), dets[j].data_ptr()); 54 | if (ovr >= iou_threshold) { 55 | suppressed[j] = 1; 56 | } 57 | } 58 | } 59 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 60 | } 61 | 62 | at::Tensor nms_rotated_cpu( 63 | // input must be contiguous 64 | const at::Tensor& dets, 65 | const at::Tensor& scores, 66 | const float iou_threshold) { 67 | auto result = at::empty({0}, dets.options()); 68 | 69 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { 70 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 71 | }); 72 | return result; 73 | } 74 | 75 | } // namespace detectron2 76 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include 3 | #include 4 | #include 5 | #include 6 | #ifdef WITH_CUDA 7 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 8 | #endif 9 | // TODO avoid this when pytorch supports "same directory" hipification 10 | #ifdef WITH_HIP 11 | #include "box_iou_rotated/box_iou_rotated_utils.h" 12 | #endif 13 | 14 | using namespace detectron2; 15 | 16 | namespace { 17 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 18 | } 19 | 20 | template 21 | __global__ void nms_rotated_cuda_kernel( 22 | const int n_boxes, 23 | const float iou_threshold, 24 | const T* dev_boxes, 25 | unsigned long long* dev_mask) { 26 | // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel 27 | 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | // Compared to nms_cuda_kernel, where each box is represented with 4 values 39 | // (x1, y1, x2, y2), each rotated box is represented with 5 values 40 | // (x_center, y_center, width, height, angle_degrees) here. 41 | __shared__ T block_boxes[threadsPerBlock * 5]; 42 | if (threadIdx.x < col_size) { 43 | block_boxes[threadIdx.x * 5 + 0] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 45 | block_boxes[threadIdx.x * 5 + 1] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 47 | block_boxes[threadIdx.x * 5 + 2] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 49 | block_boxes[threadIdx.x * 5 + 3] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 51 | block_boxes[threadIdx.x * 5 + 4] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 53 | } 54 | __syncthreads(); 55 | 56 | if (threadIdx.x < row_size) { 57 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 58 | const T* cur_box = dev_boxes + cur_box_idx * 5; 59 | int i = 0; 60 | unsigned long long t = 0; 61 | int start = 0; 62 | if (row_start == col_start) { 63 | start = threadIdx.x + 1; 64 | } 65 | for (i = start; i < col_size; i++) { 66 | // Instead of devIoU used by original horizontal nms, here 67 | // we use the single_box_iou_rotated function from box_iou_rotated_utils.h 68 | if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > 69 | iou_threshold) { 70 | t |= 1ULL << i; 71 | } 72 | } 73 | const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); 74 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 75 | } 76 | } 77 | 78 | namespace detectron2 { 79 | 80 | at::Tensor nms_rotated_cuda( 81 | // input must be contiguous 82 | const at::Tensor& dets, 83 | const at::Tensor& scores, 84 | float iou_threshold) { 85 | // using scalar_t = float; 86 | AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); 87 | AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); 88 | at::cuda::CUDAGuard device_guard(dets.device()); 89 | 90 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 91 | auto dets_sorted = dets.index_select(0, order_t); 92 | 93 | auto dets_num = dets.size(0); 94 | 95 | const int col_blocks = 96 | at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock); 97 | 98 | at::Tensor mask = 99 | at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); 100 | 101 | dim3 blocks(col_blocks, col_blocks); 102 | dim3 threads(threadsPerBlock); 103 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 104 | 105 | AT_DISPATCH_FLOATING_TYPES( 106 | dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { 107 | nms_rotated_cuda_kernel<<>>( 108 | dets_num, 109 | iou_threshold, 110 | dets_sorted.data_ptr(), 111 | (unsigned long long*)mask.data_ptr()); 112 | }); 113 | 114 | at::Tensor mask_cpu = mask.to(at::kCPU); 115 | unsigned long long* mask_host = 116 | (unsigned long long*)mask_cpu.data_ptr(); 117 | 118 | std::vector remv(col_blocks); 119 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 120 | 121 | at::Tensor keep = 122 | at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); 123 | int64_t* keep_out = keep.data_ptr(); 124 | 125 | int num_to_keep = 0; 126 | for (int i = 0; i < dets_num; i++) { 127 | int nblock = i / threadsPerBlock; 128 | int inblock = i % threadsPerBlock; 129 | 130 | if (!(remv[nblock] & (1ULL << inblock))) { 131 | keep_out[num_to_keep++] = i; 132 | unsigned long long* p = mask_host + i * col_blocks; 133 | for (int j = nblock; j < col_blocks; j++) { 134 | remv[j] |= p[j]; 135 | } 136 | } 137 | } 138 | 139 | AT_CUDA_CHECK(cudaGetLastError()); 140 | return order_t.index( 141 | {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) 142 | .to(order_t.device(), keep.scalar_type())}); 143 | } 144 | 145 | } // namespace detectron2 146 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include 4 | #include "ROIAlign/ROIAlign.h" 5 | #include "ROIAlignRotated/ROIAlignRotated.h" 6 | #include "box_iou_rotated/box_iou_rotated.h" 7 | #include "cocoeval/cocoeval.h" 8 | #include "deformable/deform_conv.h" 9 | #include "nms_rotated/nms_rotated.h" 10 | 11 | namespace detectron2 { 12 | 13 | #if defined(WITH_CUDA) || defined(WITH_HIP) 14 | extern int get_cudart_version(); 15 | #endif 16 | 17 | std::string get_cuda_version() { 18 | #if defined(WITH_CUDA) || defined(WITH_HIP) 19 | std::ostringstream oss; 20 | 21 | #if defined(WITH_CUDA) 22 | oss << "CUDA "; 23 | #else 24 | oss << "HIP "; 25 | #endif 26 | 27 | // copied from 28 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 29 | auto printCudaStyleVersion = [&](int v) { 30 | oss << (v / 1000) << "." << (v / 10 % 100); 31 | if (v % 10 != 0) { 32 | oss << "." << (v % 10); 33 | } 34 | }; 35 | printCudaStyleVersion(get_cudart_version()); 36 | return oss.str(); 37 | #else // neither CUDA nor HIP 38 | return std::string("not available"); 39 | #endif 40 | } 41 | 42 | // similar to 43 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 44 | std::string get_compiler_version() { 45 | std::ostringstream ss; 46 | #if defined(__GNUC__) 47 | #ifndef __clang__ 48 | 49 | #if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8)) 50 | #error "GCC >= 4.9 is required!" 51 | #endif 52 | 53 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 54 | #endif 55 | #endif 56 | 57 | #if defined(__clang_major__) 58 | { 59 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 60 | << __clang_patchlevel__; 61 | } 62 | #endif 63 | 64 | #if defined(_MSC_VER) 65 | { ss << "MSVC " << _MSC_FULL_VER; } 66 | #endif 67 | return ss.str(); 68 | } 69 | 70 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 71 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 72 | m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); 73 | 74 | m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); 75 | 76 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 77 | m.def( 78 | "deform_conv_backward_input", 79 | &deform_conv_backward_input, 80 | "deform_conv_backward_input"); 81 | m.def( 82 | "deform_conv_backward_filter", 83 | &deform_conv_backward_filter, 84 | "deform_conv_backward_filter"); 85 | m.def( 86 | "modulated_deform_conv_forward", 87 | &modulated_deform_conv_forward, 88 | "modulated_deform_conv_forward"); 89 | m.def( 90 | "modulated_deform_conv_backward", 91 | &modulated_deform_conv_backward, 92 | "modulated_deform_conv_backward"); 93 | 94 | m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); 95 | 96 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 97 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 98 | 99 | m.def( 100 | "roi_align_rotated_forward", 101 | &ROIAlignRotated_forward, 102 | "Forward pass for Rotated ROI-Align Operator"); 103 | m.def( 104 | "roi_align_rotated_backward", 105 | &ROIAlignRotated_backward, 106 | "Backward pass for Rotated ROI-Align Operator"); 107 | 108 | m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); 109 | m.def( 110 | "COCOevalEvaluateImages", 111 | &COCOeval::EvaluateImages, 112 | "COCOeval::EvaluateImages"); 113 | pybind11::class_(m, "InstanceAnnotation") 114 | .def(pybind11::init()); 115 | pybind11::class_(m, "ImageEvaluation") 116 | .def(pybind11::init<>()); 117 | } 118 | } // namespace detectron2 119 | -------------------------------------------------------------------------------- /detectron2/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from torch import nn 3 | from torchvision.ops import roi_align as tv_roi_align 4 | 5 | try: 6 | from torchvision import __version__ 7 | 8 | version = tuple(int(x) for x in __version__.split(".")[:2]) 9 | USE_TORCHVISION = version >= (0, 7) # https://github.com/pytorch/vision/pull/2438 10 | except ImportError: # only open source torchvision has __version__ 11 | USE_TORCHVISION = True 12 | 13 | 14 | if USE_TORCHVISION: 15 | roi_align = tv_roi_align 16 | else: 17 | from torch.nn.modules.utils import _pair 18 | from torch.autograd import Function 19 | from torch.autograd.function import once_differentiable 20 | from detectron2 import _C 21 | 22 | class _ROIAlign(Function): 23 | @staticmethod 24 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 25 | ctx.save_for_backward(roi) 26 | ctx.output_size = _pair(output_size) 27 | ctx.spatial_scale = spatial_scale 28 | ctx.sampling_ratio = sampling_ratio 29 | ctx.input_shape = input.size() 30 | ctx.aligned = aligned 31 | output = _C.roi_align_forward( 32 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 33 | ) 34 | return output 35 | 36 | @staticmethod 37 | @once_differentiable 38 | def backward(ctx, grad_output): 39 | (rois,) = ctx.saved_tensors 40 | output_size = ctx.output_size 41 | spatial_scale = ctx.spatial_scale 42 | sampling_ratio = ctx.sampling_ratio 43 | bs, ch, h, w = ctx.input_shape 44 | grad_input = _C.roi_align_backward( 45 | grad_output, 46 | rois, 47 | spatial_scale, 48 | output_size[0], 49 | output_size[1], 50 | bs, 51 | ch, 52 | h, 53 | w, 54 | sampling_ratio, 55 | ctx.aligned, 56 | ) 57 | return grad_input, None, None, None, None, None 58 | 59 | roi_align = _ROIAlign.apply 60 | 61 | 62 | # NOTE: torchvision's RoIAlign has a different default aligned=False 63 | class ROIAlign(nn.Module): 64 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): 65 | """ 66 | Args: 67 | output_size (tuple): h, w 68 | spatial_scale (float): scale the input boxes by this number 69 | sampling_ratio (int): number of inputs samples to take for each output 70 | sample. 0 to take samples densely. 71 | aligned (bool): if False, use the legacy implementation in 72 | Detectron. If True, align the results more perfectly. 73 | 74 | Note: 75 | The meaning of aligned=True: 76 | 77 | Given a continuous coordinate c, its two neighboring pixel indices (in our 78 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 79 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 80 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original 81 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring 82 | pixel indices and therefore it uses pixels with a slightly incorrect alignment 83 | (relative to our pixel model) when performing bilinear interpolation. 84 | 85 | With `aligned=True`, 86 | we first appropriately scale the ROI and then shift it by -0.5 87 | prior to calling roi_align. This produces the correct neighbors; see 88 | detectron2/tests/test_roi_align.py for verification. 89 | 90 | The difference does not make a difference to the model's performance if 91 | ROIAlign is used together with conv layers. 92 | """ 93 | super(ROIAlign, self).__init__() 94 | self.output_size = output_size 95 | self.spatial_scale = spatial_scale 96 | self.sampling_ratio = sampling_ratio 97 | self.aligned = aligned 98 | 99 | def forward(self, input, rois): 100 | """ 101 | Args: 102 | input: NCHW images 103 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 104 | """ 105 | assert rois.dim() == 2 and rois.size(1) == 5 106 | return roi_align( 107 | input, 108 | rois.to(dtype=input.dtype), 109 | self.output_size, 110 | self.spatial_scale, 111 | self.sampling_ratio, 112 | self.aligned, 113 | ) 114 | 115 | def __repr__(self): 116 | tmpstr = self.__class__.__name__ + "(" 117 | tmpstr += "output_size=" + str(self.output_size) 118 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 119 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 120 | tmpstr += ", aligned=" + str(self.aligned) 121 | tmpstr += ")" 122 | return tmpstr 123 | -------------------------------------------------------------------------------- /detectron2/layers/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from detectron2 import _C 9 | 10 | 11 | class _ROIAlignRotated(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_rotated_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | (rois,) = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_rotated_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None, None 45 | 46 | 47 | roi_align_rotated = _ROIAlignRotated.apply 48 | 49 | 50 | class ROIAlignRotated(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | """ 53 | Args: 54 | output_size (tuple): h, w 55 | spatial_scale (float): scale the input boxes by this number 56 | sampling_ratio (int): number of inputs samples to take for each output 57 | sample. 0 to take samples densely. 58 | 59 | Note: 60 | ROIAlignRotated supports continuous coordinate by default: 61 | Given a continuous coordinate c, its two neighboring pixel indices (in our 62 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 63 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 64 | from the underlying signal at continuous coordinates 0.5 and 1.5). 65 | """ 66 | super(ROIAlignRotated, self).__init__() 67 | self.output_size = output_size 68 | self.spatial_scale = spatial_scale 69 | self.sampling_ratio = sampling_ratio 70 | 71 | def forward(self, input, rois): 72 | """ 73 | Args: 74 | input: NCHW images 75 | rois: Bx6 boxes. First column is the index into N. 76 | The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). 77 | """ 78 | assert rois.dim() == 2 and rois.size(1) == 6 79 | orig_dtype = input.dtype 80 | if orig_dtype == torch.float16: 81 | input = input.float() 82 | rois = rois.float() 83 | return roi_align_rotated( 84 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 85 | ).to(dtype=orig_dtype) 86 | 87 | def __repr__(self): 88 | tmpstr = self.__class__.__name__ + "(" 89 | tmpstr += "output_size=" + str(self.output_size) 90 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 91 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 92 | tmpstr += ")" 93 | return tmpstr 94 | -------------------------------------------------------------------------------- /detectron2/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | from detectron2 import _C 5 | 6 | 7 | def pairwise_iou_rotated(boxes1, boxes2): 8 | """ 9 | Return intersection-over-union (Jaccard index) of boxes. 10 | 11 | Both sets of boxes are expected to be in 12 | (x_center, y_center, width, height, angle) format. 13 | 14 | Arguments: 15 | boxes1 (Tensor[N, 5]) 16 | boxes2 (Tensor[M, 5]) 17 | 18 | Returns: 19 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 20 | IoU values for every element in boxes1 and boxes2 21 | """ 22 | return _C.box_iou_rotated(boxes1, boxes2) 23 | -------------------------------------------------------------------------------- /detectron2/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to complement the lack of shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /detectron2/layers/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Wrappers around on some nn functions, mainly to support empty tensors. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in those functions. 6 | 7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013 8 | is implemented 9 | """ 10 | 11 | from typing import List 12 | import torch 13 | from torch.nn import functional as F 14 | 15 | from detectron2.utils.env import TORCH_VERSION 16 | 17 | 18 | def cat(tensors: List[torch.Tensor], dim: int = 0): 19 | """ 20 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 21 | """ 22 | assert isinstance(tensors, (list, tuple)) 23 | if len(tensors) == 1: 24 | return tensors[0] 25 | return torch.cat(tensors, dim) 26 | 27 | 28 | class _NewEmptyTensorOp(torch.autograd.Function): 29 | @staticmethod 30 | def forward(ctx, x, new_shape): 31 | ctx.shape = x.shape 32 | return x.new_empty(new_shape) 33 | 34 | @staticmethod 35 | def backward(ctx, grad): 36 | shape = ctx.shape 37 | return _NewEmptyTensorOp.apply(grad, shape), None 38 | 39 | 40 | class Conv2d(torch.nn.Conv2d): 41 | """ 42 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 43 | """ 44 | 45 | def __init__(self, *args, **kwargs): 46 | """ 47 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 48 | 49 | Args: 50 | norm (nn.Module, optional): a normalization layer 51 | activation (callable(Tensor) -> Tensor): a callable activation function 52 | 53 | It assumes that norm layer is used before activation. 54 | """ 55 | norm = kwargs.pop("norm", None) 56 | activation = kwargs.pop("activation", None) 57 | super().__init__(*args, **kwargs) 58 | 59 | self.norm = norm 60 | self.activation = activation 61 | 62 | def forward(self, x): 63 | # torchscript does not support SyncBatchNorm yet 64 | # https://github.com/pytorch/pytorch/issues/40507 65 | # and we skip these codes in torchscript since: 66 | # 1. currently we only support torchscript in evaluation mode 67 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or 68 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. 69 | if not torch.jit.is_scripting(): 70 | if x.numel() == 0 and self.training: 71 | # https://github.com/pytorch/pytorch/issues/12013 72 | assert not isinstance( 73 | self.norm, torch.nn.SyncBatchNorm 74 | ), "SyncBatchNorm does not support empty inputs!" 75 | 76 | x = F.conv2d( 77 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups 78 | ) 79 | if self.norm is not None: 80 | x = self.norm(x) 81 | if self.activation is not None: 82 | x = self.activation(x) 83 | return x 84 | 85 | 86 | ConvTranspose2d = torch.nn.ConvTranspose2d 87 | BatchNorm2d = torch.nn.BatchNorm2d 88 | interpolate = F.interpolate 89 | 90 | 91 | if TORCH_VERSION > (1, 5): 92 | Linear = torch.nn.Linear 93 | else: 94 | 95 | class Linear(torch.nn.Linear): 96 | """ 97 | A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features. 98 | Because of https://github.com/pytorch/pytorch/issues/34202 99 | """ 100 | 101 | def forward(self, x): 102 | if x.numel() == 0: 103 | output_shape = [x.shape[0], self.weight.shape[0]] 104 | 105 | empty = _NewEmptyTensorOp.apply(x, output_shape) 106 | if self.training: 107 | # This is to make DDP happy. 108 | # DDP expects all workers to have gradient w.r.t the same set of parameters. 109 | _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 110 | return empty + _dummy 111 | else: 112 | return empty 113 | 114 | x = super().forward(x) 115 | return x 116 | 117 | 118 | def nonzero_tuple(x): 119 | """ 120 | A 'as_tuple=True' version of torch.nonzero to support torchscript. 121 | because of https://github.com/pytorch/pytorch/issues/38718 122 | """ 123 | if torch.jit.is_scripting(): 124 | if x.dim() == 0: 125 | return x.unsqueeze(0).nonzero().unbind(1) 126 | return x.nonzero().unbind(1) 127 | else: 128 | return x.nonzero(as_tuple=True) 129 | -------------------------------------------------------------------------------- /detectron2/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and 4 | optionally load pre-trained weights as released in 5 | `MODEL_ZOO.md `_. 6 | """ 7 | from .model_zoo import get, get_config_file, get_checkpoint_url, get_config 8 | 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"] 10 | -------------------------------------------------------------------------------- /detectron2/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | 4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY 5 | from .backbone import ( 6 | BACKBONE_REGISTRY, 7 | FPN, 8 | Backbone, 9 | ResNet, 10 | ResNetBlockBase, 11 | build_backbone, 12 | build_resnet_backbone, 13 | make_stage, 14 | ) 15 | from .meta_arch import ( 16 | META_ARCH_REGISTRY, 17 | SEM_SEG_HEADS_REGISTRY, 18 | GeneralizedRCNN, 19 | PanopticFPN, 20 | ProposalNetwork, 21 | RetinaNet, 22 | SemanticSegmentor, 23 | build_model, 24 | build_sem_seg_head, 25 | ) 26 | from .postprocessing import detector_postprocess 27 | from .proposal_generator import ( 28 | PROPOSAL_GENERATOR_REGISTRY, 29 | build_proposal_generator, 30 | RPN_HEAD_REGISTRY, 31 | build_rpn_head, 32 | ) 33 | from .roi_heads import ( 34 | ROI_BOX_HEAD_REGISTRY, 35 | ROI_HEADS_REGISTRY, 36 | ROI_KEYPOINT_HEAD_REGISTRY, 37 | ROI_MASK_HEAD_REGISTRY, 38 | ROIHeads, 39 | StandardROIHeads, 40 | BaseMaskRCNNHead, 41 | BaseKeypointRCNNHead, 42 | FastRCNNOutputLayers, 43 | build_box_head, 44 | build_keypoint_head, 45 | build_mask_head, 46 | build_roi_heads, 47 | ) 48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA 49 | 50 | _EXCLUDE = {"ShapeSpec"} 51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 52 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip 3 | 4 | from .backbone import Backbone 5 | from .fpn import FPN 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | # TODO can expose more resnet blocks after careful consideration 10 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self) -> int: 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec( 50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 51 | ) 52 | for name in self._out_features 53 | } 54 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | BACKBONE_REGISTRY.__doc__ = """ 9 | Registry for backbones, which extract feature maps from images 10 | 11 | The registered object must be a callable that accepts two arguments: 12 | 13 | 1. A :class:`detectron2.config.CfgNode` 14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. 15 | 16 | Registered object must return instance of :class:`Backbone`. 17 | """ 18 | 19 | 20 | def build_backbone(cfg, input_shape=None): 21 | """ 22 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 23 | 24 | Returns: 25 | an instance of :class:`Backbone` 26 | """ 27 | if input_shape is None: 28 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 29 | 30 | backbone_name = cfg.MODEL.BACKBONE.NAME 31 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 32 | assert isinstance(backbone, Backbone) 33 | return backbone 34 | -------------------------------------------------------------------------------- /detectron2/modeling/matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from typing import List 3 | import torch 4 | 5 | from detectron2.layers import nonzero_tuple 6 | 7 | 8 | class Matcher(object): 9 | """ 10 | This class assigns to each predicted "element" (e.g., a box) a ground-truth 11 | element. Each predicted element will have exactly zero or one matches; each 12 | ground-truth element may be matched to zero or more predicted elements. 13 | 14 | The matching is determined by the MxN match_quality_matrix, that characterizes 15 | how well each (ground-truth, prediction)-pair match each other. For example, 16 | if the elements are boxes, this matrix may contain box intersection-over-union 17 | overlap values. 18 | 19 | The matcher returns (a) a vector of length N containing the index of the 20 | ground-truth element m in [0, M) that matches to prediction n in [0, N). 21 | (b) a vector of length N containing the labels for each prediction. 22 | """ 23 | 24 | def __init__( 25 | self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False 26 | ): 27 | """ 28 | Args: 29 | thresholds (list): a list of thresholds used to stratify predictions 30 | into levels. 31 | labels (list): a list of values to label predictions belonging at 32 | each level. A label can be one of {-1, 0, 1} signifying 33 | {ignore, negative class, positive class}, respectively. 34 | allow_low_quality_matches (bool): if True, produce additional matches 35 | for predictions with maximum match quality lower than high_threshold. 36 | See set_low_quality_matches_ for more details. 37 | 38 | For example, 39 | thresholds = [0.3, 0.5] 40 | labels = [0, -1, 1] 41 | All predictions with iou < 0.3 will be marked with 0 and 42 | thus will be considered as false positives while training. 43 | All predictions with 0.3 <= iou < 0.5 will be marked with -1 and 44 | thus will be ignored. 45 | All predictions with 0.5 <= iou will be marked with 1 and 46 | thus will be considered as true positives. 47 | """ 48 | # Add -inf and +inf to first and last position in thresholds 49 | thresholds = thresholds[:] 50 | assert thresholds[0] > 0 51 | thresholds.insert(0, -float("inf")) 52 | thresholds.append(float("inf")) 53 | # Currently torchscript does not support all + generator 54 | assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) 55 | assert all([l in [-1, 0, 1] for l in labels]) 56 | assert len(labels) == len(thresholds) - 1 57 | self.thresholds = thresholds 58 | self.labels = labels 59 | self.allow_low_quality_matches = allow_low_quality_matches 60 | 61 | def __call__(self, match_quality_matrix): 62 | """ 63 | Args: 64 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the 65 | pairwise quality between M ground-truth elements and N predicted 66 | elements. All elements must be >= 0 (due to the us of `torch.nonzero` 67 | for selecting indices in :meth:`set_low_quality_matches_`). 68 | 69 | Returns: 70 | matches (Tensor[int64]): a vector of length N, where matches[i] is a matched 71 | ground-truth index in [0, M) 72 | match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates 73 | whether a prediction is a true or false positive or ignored 74 | """ 75 | assert match_quality_matrix.dim() == 2 76 | if match_quality_matrix.numel() == 0: 77 | default_matches = match_quality_matrix.new_full( 78 | (match_quality_matrix.size(1),), 0, dtype=torch.int64 79 | ) 80 | # When no gt boxes exist, we define IOU = 0 and therefore set labels 81 | # to `self.labels[0]`, which usually defaults to background class 0 82 | # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds 83 | default_match_labels = match_quality_matrix.new_full( 84 | (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 85 | ) 86 | return default_matches, default_match_labels 87 | 88 | assert torch.all(match_quality_matrix >= 0) 89 | 90 | # match_quality_matrix is M (gt) x N (predicted) 91 | # Max over gt elements (dim 0) to find best gt candidate for each prediction 92 | matched_vals, matches = match_quality_matrix.max(dim=0) 93 | 94 | match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) 95 | 96 | for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): 97 | low_high = (matched_vals >= low) & (matched_vals < high) 98 | match_labels[low_high] = l 99 | 100 | if self.allow_low_quality_matches: 101 | self.set_low_quality_matches_(match_labels, match_quality_matrix) 102 | 103 | return matches, match_labels 104 | 105 | def set_low_quality_matches_(self, match_labels, match_quality_matrix): 106 | """ 107 | Produce additional matches for predictions that have only low-quality matches. 108 | Specifically, for each ground-truth G find the set of predictions that have 109 | maximum overlap with it (including ties); for each prediction in that set, if 110 | it is unmatched, then match it to the ground-truth G. 111 | 112 | This function implements the RPN assignment case (i) in Sec. 3.1.2 of 113 | :paper:`Faster R-CNN`. 114 | """ 115 | # For each gt, find the prediction with which it has highest quality 116 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) 117 | # Find the highest quality match available, even if it is low, including ties. 118 | # Note that the matches qualities must be positive due to the use of 119 | # `torch.nonzero`. 120 | _, pred_inds_with_highest_quality = nonzero_tuple( 121 | match_quality_matrix == highest_quality_foreach_gt[:, None] 122 | ) 123 | # If an anchor was labeled positive only due to a low-quality match 124 | # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. 125 | # This follows the implementation in Detectron, and is found to have no significant impact. 126 | match_labels[pred_inds_with_highest_quality] = 1 127 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 5 | 6 | from .panoptic_fpn import PanopticFPN 7 | 8 | # import all the meta_arch, so they will be registered 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork 10 | from .retinanet import RetinaNet 11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head 12 | 13 | 14 | __all__ = list(globals().keys()) 15 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.utils.registry import Registry 5 | 6 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 7 | META_ARCH_REGISTRY.__doc__ = """ 8 | Registry for meta-architectures, i.e. the whole model. 9 | 10 | The registered object will be called with `obj(cfg)` 11 | and expected to return a `nn.Module` object. 12 | """ 13 | 14 | 15 | def build_model(cfg): 16 | """ 17 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. 18 | Note that it does not load any weights from ``cfg``. 19 | """ 20 | meta_arch = cfg.MODEL.META_ARCHITECTURE 21 | model = META_ARCH_REGISTRY.get(meta_arch)(cfg) 22 | model.to(torch.device(cfg.MODEL.DEVICE)) 23 | return model 24 | -------------------------------------------------------------------------------- /detectron2/modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | from detectron2.layers import paste_masks_in_image 6 | from detectron2.structures import Instances 7 | from detectron2.utils.memory import retry_if_cuda_oom 8 | 9 | 10 | # perhaps should rename to "resize_instance" 11 | def detector_postprocess( 12 | results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5 13 | ): 14 | """ 15 | Resize the output instances. 16 | The input images are often resized when entering an object detector. 17 | As a result, we often need the outputs of the detector in a different 18 | resolution from its inputs. 19 | 20 | This function will resize the raw outputs of an R-CNN detector 21 | to produce outputs according to the desired output resolution. 22 | 23 | Args: 24 | results (Instances): the raw outputs from the detector. 25 | `results.image_size` contains the input image resolution the detector sees. 26 | This object might be modified in-place. 27 | output_height, output_width: the desired output resolution. 28 | 29 | Returns: 30 | Instances: the resized output from the model, based on the output resolution 31 | """ 32 | # Change to 'if is_tracing' after PT1.7 33 | if isinstance(output_height, torch.Tensor): 34 | # Converts integer tensors to float temporaries to ensure true 35 | # division is performed when computing scale_x and scale_y. 36 | output_width_tmp = output_width.float() 37 | output_height_tmp = output_height.float() 38 | new_size = torch.stack([output_height, output_width]) 39 | else: 40 | new_size = (output_height, output_width) 41 | output_width_tmp = output_width 42 | output_height_tmp = output_height 43 | 44 | scale_x, scale_y = ( 45 | output_width_tmp / results.image_size[1], 46 | output_height_tmp / results.image_size[0], 47 | ) 48 | results = Instances(new_size, **results.get_fields()) 49 | 50 | if results.has("pred_boxes"): 51 | output_boxes = results.pred_boxes 52 | elif results.has("proposal_boxes"): 53 | output_boxes = results.proposal_boxes 54 | else: 55 | output_boxes = None 56 | assert output_boxes is not None, "Predictions must contain boxes!" 57 | 58 | output_boxes.scale(scale_x, scale_y) 59 | output_boxes.clip(results.image_size) 60 | 61 | results = results[output_boxes.nonempty()] 62 | 63 | if results.has("pred_masks"): 64 | results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( 65 | results.pred_masks[:, 0, :, :], # N, 1, M, M 66 | results.pred_boxes, 67 | results.image_size, 68 | threshold=mask_threshold, 69 | ) 70 | 71 | if results.has("pred_keypoints"): 72 | results.pred_keypoints[:, :, 0] *= scale_x 73 | results.pred_keypoints[:, :, 1] *= scale_y 74 | 75 | return results 76 | 77 | 78 | def sem_seg_postprocess(result, img_size, output_height, output_width): 79 | """ 80 | Return semantic segmentation predictions in the original resolution. 81 | 82 | The input images are often resized when entering semantic segmentor. Moreover, in same 83 | cases, they also padded inside segmentor to be divisible by maximum network stride. 84 | As a result, we often need the predictions of the segmentor in a different 85 | resolution from its inputs. 86 | 87 | Args: 88 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 89 | where C is the number of classes, and H, W are the height and width of the prediction. 90 | img_size (tuple): image size that segmentor is taking as input. 91 | output_height, output_width: the desired output resolution. 92 | 93 | Returns: 94 | semantic segmentation prediction (Tensor): A tensor of the shape 95 | (C, output_height, output_width) that contains per-pixel soft predictions. 96 | """ 97 | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) 98 | result = F.interpolate( 99 | result, size=(output_height, output_width), mode="bilinear", align_corners=False 100 | )[0] 101 | return result 102 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator 3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN 4 | 5 | __all__ = list(globals().keys()) 6 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | 8 | The registered object will be called with `obj(cfg, input_shape)`. 9 | The call should return a `nn.Module` object. 10 | """ 11 | 12 | from . import rpn, rrpn # noqa F401 isort:skip 13 | 14 | 15 | def build_proposal_generator(cfg, input_shape): 16 | """ 17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 18 | The name can be "PrecomputedProposals" to use no proposal generator. 19 | """ 20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 21 | if name == "PrecomputedProposals": 22 | return None 23 | 24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 25 | -------------------------------------------------------------------------------- /detectron2/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead 3 | from .keypoint_head import ( 4 | ROI_KEYPOINT_HEAD_REGISTRY, 5 | build_keypoint_head, 6 | BaseKeypointRCNNHead, 7 | KRCNNConvDeconvUpsampleHead, 8 | ) 9 | from .mask_head import ( 10 | ROI_MASK_HEAD_REGISTRY, 11 | build_mask_head, 12 | BaseMaskRCNNHead, 13 | MaskRCNNConvUpsampleHead, 14 | ) 15 | from .roi_heads import ( 16 | ROI_HEADS_REGISTRY, 17 | ROIHeads, 18 | Res5ROIHeads, 19 | StandardROIHeads, 20 | build_roi_heads, 21 | select_foreground_proposals, 22 | ) 23 | from .rotated_fast_rcnn import RROIHeads 24 | from .fast_rcnn import FastRCNNOutputLayers 25 | 26 | from . import cascade_rcnn # isort:skip 27 | 28 | __all__ = list(globals().keys()) 29 | -------------------------------------------------------------------------------- /detectron2/modeling/roi_heads/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from typing import List 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | 8 | from detectron2.config import configurable 9 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm 10 | from detectron2.utils.registry import Registry 11 | 12 | __all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"] 13 | 14 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") 15 | ROI_BOX_HEAD_REGISTRY.__doc__ = """ 16 | Registry for box heads, which make box predictions from per-region features. 17 | 18 | The registered object will be called with `obj(cfg, input_shape)`. 19 | """ 20 | 21 | 22 | # To get torchscript support, we make the head a subclass of `nn.Sequential`. 23 | # Therefore, to add new layers in this head class, please make sure they are 24 | # added in the order they will be used in forward(). 25 | @ROI_BOX_HEAD_REGISTRY.register() 26 | class FastRCNNConvFCHead(nn.Sequential): 27 | """ 28 | A head with several 3x3 conv layers (each followed by norm & relu) and then 29 | several fc layers (each followed by relu). 30 | """ 31 | 32 | @configurable 33 | def __init__( 34 | self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" 35 | ): 36 | """ 37 | NOTE: this interface is experimental. 38 | 39 | Args: 40 | input_shape (ShapeSpec): shape of the input feature. 41 | conv_dims (list[int]): the output dimensions of the conv layers 42 | fc_dims (list[int]): the output dimensions of the fc layers 43 | conv_norm (str or callable): normalization for the conv layers. 44 | See :func:`detectron2.layers.get_norm` for supported types. 45 | """ 46 | super().__init__() 47 | assert len(conv_dims) + len(fc_dims) > 0 48 | 49 | self._output_size = (input_shape.channels, input_shape.height, input_shape.width) 50 | 51 | self.conv_norm_relus = [] 52 | for k, conv_dim in enumerate(conv_dims): 53 | conv = Conv2d( 54 | self._output_size[0], 55 | conv_dim, 56 | kernel_size=3, 57 | padding=1, 58 | bias=not conv_norm, 59 | norm=get_norm(conv_norm, conv_dim), 60 | activation=nn.ReLU(), 61 | ) 62 | self.add_module("conv{}".format(k + 1), conv) 63 | self.conv_norm_relus.append(conv) 64 | self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) 65 | 66 | self.fcs = [] 67 | for k, fc_dim in enumerate(fc_dims): 68 | if k == 0: 69 | self.add_module("flatten", nn.Flatten()) 70 | fc = Linear(int(np.prod(self._output_size)), fc_dim) 71 | self.add_module("fc{}".format(k + 1), fc) 72 | self.add_module("fc_relu{}".format(k + 1), nn.ReLU()) 73 | self.fcs.append(fc) 74 | self._output_size = fc_dim 75 | 76 | for layer in self.conv_norm_relus: 77 | weight_init.c2_msra_fill(layer) 78 | for layer in self.fcs: 79 | weight_init.c2_xavier_fill(layer) 80 | 81 | @classmethod 82 | def from_config(cls, cfg, input_shape): 83 | num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV 84 | conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM 85 | num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC 86 | fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM 87 | return { 88 | "input_shape": input_shape, 89 | "conv_dims": [conv_dim] * num_conv, 90 | "fc_dims": [fc_dim] * num_fc, 91 | "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, 92 | } 93 | 94 | def forward(self, x): 95 | for layer in self: 96 | x = layer(x) 97 | return x 98 | 99 | @property 100 | @torch.jit.unused 101 | def output_shape(self): 102 | """ 103 | Returns: 104 | ShapeSpec: the output feature shape 105 | """ 106 | o = self._output_size 107 | if isinstance(o, int): 108 | return ShapeSpec(channels=o) 109 | else: 110 | return ShapeSpec(channels=o[0], height=o[1], width=o[2]) 111 | 112 | 113 | def build_box_head(cfg, input_shape): 114 | """ 115 | Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. 116 | """ 117 | name = cfg.MODEL.ROI_BOX_HEAD.NAME 118 | return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) 119 | -------------------------------------------------------------------------------- /detectron2/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.layers import nonzero_tuple 5 | 6 | __all__ = ["subsample_labels"] 7 | 8 | 9 | def subsample_labels( 10 | labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int 11 | ): 12 | """ 13 | Return `num_samples` (or fewer, if not enough found) 14 | random samples from `labels` which is a mixture of positives & negatives. 15 | It will try to return as many positives as possible without 16 | exceeding `positive_fraction * num_samples`, and then try to 17 | fill the remaining slots with negatives. 18 | 19 | Args: 20 | labels (Tensor): (N, ) label vector with values: 21 | * -1: ignore 22 | * bg_label: background ("negative") class 23 | * otherwise: one or more foreground ("positive") classes 24 | num_samples (int): The total number of labels with value >= 0 to return. 25 | Values that are not sampled will be filled with -1 (ignore). 26 | positive_fraction (float): The number of subsampled labels with values > 0 27 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 28 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 29 | In order words, if there are not enough positives, the sample is filled with 30 | negatives. If there are also not enough negatives, then as many elements are 31 | sampled as is possible. 32 | bg_label (int): label index of background ("negative") class. 33 | 34 | Returns: 35 | pos_idx, neg_idx (Tensor): 36 | 1D vector of indices. The total length of both is `num_samples` or fewer. 37 | """ 38 | positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] 39 | negative = nonzero_tuple(labels == bg_label)[0] 40 | 41 | num_pos = int(num_samples * positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = num_samples - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx = positive[perm1] 53 | neg_idx = negative[perm2] 54 | return pos_idx, neg_idx 55 | -------------------------------------------------------------------------------- /detectron2/projects/README.md: -------------------------------------------------------------------------------- 1 | 2 | Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here. 3 | -------------------------------------------------------------------------------- /detectron2/projects/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import importlib 3 | from pathlib import Path 4 | 5 | _PROJECTS = { 6 | "point_rend": "PointRend", 7 | "deeplab": "DeepLab", 8 | "panoptic_deeplab": "Panoptic-DeepLab", 9 | } 10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects" 11 | 12 | if _PROJECT_ROOT.is_dir(): 13 | # This is true only for in-place installation (pip install -e, setup.py develop), 14 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 15 | 16 | class _D2ProjectsFinder(importlib.abc.MetaPathFinder): 17 | def find_spec(self, name, path, target=None): 18 | if not name.startswith("detectron2.projects."): 19 | return 20 | project_name = name.split(".")[-1] 21 | project_dir = _PROJECTS.get(project_name) 22 | if not project_dir: 23 | return 24 | target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py" 25 | if not target_file.is_file(): 26 | return 27 | return importlib.util.spec_from_file_location(name, target_file) 28 | 29 | import sys 30 | 31 | sys.meta_path.append(_D2ProjectsFinder()) 32 | -------------------------------------------------------------------------------- /detectron2/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /detectron2/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import math 3 | from bisect import bisect_right 4 | from typing import List 5 | import torch 6 | 7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes 8 | # only on epoch boundaries. We typically use iteration based schedules instead. 9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean 10 | # "iteration" instead. 11 | 12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating 13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. 14 | 15 | 16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 17 | def __init__( 18 | self, 19 | optimizer: torch.optim.Optimizer, 20 | milestones: List[int], 21 | gamma: float = 0.1, 22 | warmup_factor: float = 0.001, 23 | warmup_iters: int = 1000, 24 | warmup_method: str = "linear", 25 | last_epoch: int = -1, 26 | ): 27 | if not list(milestones) == sorted(milestones): 28 | raise ValueError( 29 | "Milestones should be a list of" " increasing integers. Got {}", milestones 30 | ) 31 | self.milestones = milestones 32 | self.gamma = gamma 33 | self.warmup_factor = warmup_factor 34 | self.warmup_iters = warmup_iters 35 | self.warmup_method = warmup_method 36 | super().__init__(optimizer, last_epoch) 37 | 38 | def get_lr(self) -> List[float]: 39 | warmup_factor = _get_warmup_factor_at_iter( 40 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 41 | ) 42 | return [ 43 | base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) 44 | for base_lr in self.base_lrs 45 | ] 46 | 47 | def _compute_values(self) -> List[float]: 48 | # The new interface 49 | return self.get_lr() 50 | 51 | 52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): 53 | def __init__( 54 | self, 55 | optimizer: torch.optim.Optimizer, 56 | max_iters: int, 57 | warmup_factor: float = 0.001, 58 | warmup_iters: int = 1000, 59 | warmup_method: str = "linear", 60 | last_epoch: int = -1, 61 | ): 62 | self.max_iters = max_iters 63 | self.warmup_factor = warmup_factor 64 | self.warmup_iters = warmup_iters 65 | self.warmup_method = warmup_method 66 | super().__init__(optimizer, last_epoch) 67 | 68 | def get_lr(self) -> List[float]: 69 | warmup_factor = _get_warmup_factor_at_iter( 70 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 71 | ) 72 | # Different definitions of half-cosine with warmup are possible. For 73 | # simplicity we multiply the standard half-cosine schedule by the warmup 74 | # factor. An alternative is to start the period of the cosine at warmup_iters 75 | # instead of at 0. In the case that warmup_iters << max_iters the two are 76 | # very close to each other. 77 | return [ 78 | base_lr 79 | * warmup_factor 80 | * 0.5 81 | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) 82 | for base_lr in self.base_lrs 83 | ] 84 | 85 | def _compute_values(self) -> List[float]: 86 | # The new interface 87 | return self.get_lr() 88 | 89 | 90 | def _get_warmup_factor_at_iter( 91 | method: str, iter: int, warmup_iters: int, warmup_factor: float 92 | ) -> float: 93 | """ 94 | Return the learning rate warmup factor at a specific iteration. 95 | See :paper:`ImageNet in 1h` for more details. 96 | 97 | Args: 98 | method (str): warmup method; either "constant" or "linear". 99 | iter (int): iteration at which to calculate the warmup factor. 100 | warmup_iters (int): the number of warmup iterations. 101 | warmup_factor (float): the base warmup factor (the meaning changes according 102 | to the method used). 103 | 104 | Returns: 105 | float: the effective warmup factor at the given iteration. 106 | """ 107 | if iter >= warmup_iters: 108 | return 1.0 109 | 110 | if method == "constant": 111 | return warmup_factor 112 | elif method == "linear": 113 | alpha = iter / warmup_iters 114 | return warmup_factor * (1 - alpha) + alpha 115 | else: 116 | raise ValueError("Unknown warmup method: {}".format(method)) 117 | -------------------------------------------------------------------------------- /detectron2/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa 3 | from .image_list import ImageList 4 | 5 | from .instances import Instances 6 | from .keypoints import Keypoints, heatmaps_to_keypoints 7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask 8 | from .rotated_boxes import RotatedBoxes 9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 10 | 11 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 12 | -------------------------------------------------------------------------------- /detectron2/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import division 3 | from typing import Any, List, Tuple 4 | import torch 5 | from torch import device 6 | from torch.nn import functional as F 7 | 8 | from detectron2.utils.env import TORCH_VERSION 9 | 10 | 11 | class ImageList(object): 12 | """ 13 | Structure that holds a list of images (of possibly 14 | varying sizes) as a single tensor. 15 | This works by padding the images to the same size, 16 | and storing in a field the original sizes of each image 17 | 18 | Attributes: 19 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 20 | """ 21 | 22 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): 23 | """ 24 | Arguments: 25 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 26 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can 27 | be smaller than (H, W) due to padding. 28 | """ 29 | self.tensor = tensor 30 | self.image_sizes = image_sizes 31 | 32 | def __len__(self) -> int: 33 | return len(self.image_sizes) 34 | 35 | def __getitem__(self, idx) -> torch.Tensor: 36 | """ 37 | Access the individual image in its original size. 38 | 39 | Args: 40 | idx: int or slice 41 | 42 | Returns: 43 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 44 | """ 45 | size = self.image_sizes[idx] 46 | return self.tensor[idx, ..., : size[0], : size[1]] 47 | 48 | @torch.jit.unused 49 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 50 | cast_tensor = self.tensor.to(*args, **kwargs) 51 | return ImageList(cast_tensor, self.image_sizes) 52 | 53 | @property 54 | def device(self) -> device: 55 | return self.tensor.device 56 | 57 | @staticmethod 58 | def from_tensors( 59 | tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 60 | ) -> "ImageList": 61 | """ 62 | Args: 63 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 64 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded 65 | to the same shape with `pad_value`. 66 | size_divisibility (int): If `size_divisibility > 0`, add padding to ensure 67 | the common height and width is divisible by `size_divisibility`. 68 | This depends on the model and many models need a divisibility of 32. 69 | pad_value (float): value to pad 70 | 71 | Returns: 72 | an `ImageList`. 73 | """ 74 | assert len(tensors) > 0 75 | assert isinstance(tensors, (tuple, list)) 76 | for t in tensors: 77 | assert isinstance(t, torch.Tensor), type(t) 78 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 79 | 80 | # Magic code below that handles dynamic shapes for both scripting and tracing ... 81 | 82 | image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] 83 | 84 | if torch.jit.is_scripting(): 85 | max_size = torch.stack([torch.as_tensor(x) for x in image_sizes]).max(0).values 86 | if size_divisibility > 1: 87 | stride = size_divisibility 88 | # the last two dims are H,W, both subject to divisibility requirement 89 | max_size = (max_size + (stride - 1)) // stride * stride 90 | 91 | max_size: List[int] = max_size.to(dtype=torch.long).tolist() 92 | else: 93 | # https://github.com/pytorch/pytorch/issues/42448 94 | if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing(): 95 | # In tracing mode, x.shape[i] is a scalar Tensor, and should not be converted 96 | # to int: this will cause the traced graph to have hard-coded shapes. 97 | # Instead we convert each shape to a vector with a stack() 98 | image_sizes = [torch.stack(x) for x in image_sizes] 99 | 100 | # maximum (H, W) for the last two dims 101 | # find the maximum in a tracable way 102 | max_size = torch.stack(image_sizes).max(0).values 103 | else: 104 | # Original eager logic here -- not scripting, not tracing: 105 | # (can be unified with scripting after 106 | # https://github.com/pytorch/pytorch/issues/47379) 107 | max_size = torch.as_tensor( 108 | [max(s) for s in zip(*[img.shape[-2:] for img in tensors])] 109 | ) 110 | 111 | if size_divisibility > 1: 112 | stride = size_divisibility 113 | # the last two dims are H,W, both subject to divisibility requirement 114 | max_size = (max_size + (stride - 1)) // stride * stride 115 | 116 | if len(tensors) == 1: 117 | # This seems slightly (2%) faster. 118 | # TODO: check whether it's faster for multiple images as well 119 | image_size = image_sizes[0] 120 | padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] 121 | batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0) 122 | else: 123 | # max_size can be a tensor in tracing mode, therefore convert to list 124 | batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size) 125 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 126 | for img, pad_img in zip(tensors, batched_imgs): 127 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) 128 | 129 | return ImageList(batched_imgs.contiguous(), image_sizes) 130 | -------------------------------------------------------------------------------- /detectron2/structures/instances.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import itertools 3 | from typing import Any, Dict, List, Tuple, Union 4 | import torch 5 | 6 | 7 | class Instances: 8 | """ 9 | This class represents a list of instances in an image. 10 | It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". 11 | All fields must have the same ``__len__`` which is the number of instances. 12 | 13 | All other (non-field) attributes of this class are considered private: 14 | they must start with '_' and are not modifiable by a user. 15 | 16 | Some basic usage: 17 | 18 | 1. Set/get/check a field: 19 | 20 | .. code-block:: python 21 | 22 | instances.gt_boxes = Boxes(...) 23 | print(instances.pred_masks) # a tensor of shape (N, H, W) 24 | print('gt_masks' in instances) 25 | 26 | 2. ``len(instances)`` returns the number of instances 27 | 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields 28 | and returns a new :class:`Instances`. 29 | Typically, ``indices`` is a integer vector of indices, 30 | or a binary mask of length ``num_instances`` 31 | 32 | .. code-block:: python 33 | 34 | category_3_detections = instances[instances.pred_classes == 3] 35 | confident_detections = instances[instances.scores > 0.9] 36 | """ 37 | 38 | def __init__(self, image_size: Tuple[int, int], **kwargs: Any): 39 | """ 40 | Args: 41 | image_size (height, width): the spatial size of the image. 42 | kwargs: fields to add to this `Instances`. 43 | """ 44 | self._image_size = image_size 45 | self._fields: Dict[str, Any] = {} 46 | for k, v in kwargs.items(): 47 | self.set(k, v) 48 | 49 | @property 50 | def image_size(self) -> Tuple[int, int]: 51 | """ 52 | Returns: 53 | tuple: height, width 54 | """ 55 | return self._image_size 56 | 57 | def __setattr__(self, name: str, val: Any) -> None: 58 | if name.startswith("_"): 59 | super().__setattr__(name, val) 60 | else: 61 | self.set(name, val) 62 | 63 | def __getattr__(self, name: str) -> Any: 64 | if name == "_fields" or name not in self._fields: 65 | raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) 66 | return self._fields[name] 67 | 68 | def set(self, name: str, value: Any) -> None: 69 | """ 70 | Set the field named `name` to `value`. 71 | The length of `value` must be the number of instances, 72 | and must agree with other existing fields in this object. 73 | """ 74 | data_len = len(value) 75 | if len(self._fields): 76 | assert ( 77 | len(self) == data_len 78 | ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) 79 | self._fields[name] = value 80 | 81 | def has(self, name: str) -> bool: 82 | """ 83 | Returns: 84 | bool: whether the field called `name` exists. 85 | """ 86 | return name in self._fields 87 | 88 | def remove(self, name: str) -> None: 89 | """ 90 | Remove the field called `name`. 91 | """ 92 | del self._fields[name] 93 | 94 | def get(self, name: str) -> Any: 95 | """ 96 | Returns the field called `name`. 97 | """ 98 | return self._fields[name] 99 | 100 | def get_fields(self) -> Dict[str, Any]: 101 | """ 102 | Returns: 103 | dict: a dict which maps names (str) to data of the fields 104 | 105 | Modifying the returned dict will modify this instance. 106 | """ 107 | return self._fields 108 | 109 | # Tensor-like methods 110 | def to(self, *args: Any, **kwargs: Any) -> "Instances": 111 | """ 112 | Returns: 113 | Instances: all fields are called with a `to(device)`, if the field has this method. 114 | """ 115 | ret = Instances(self._image_size) 116 | for k, v in self._fields.items(): 117 | if hasattr(v, "to"): 118 | v = v.to(*args, **kwargs) 119 | ret.set(k, v) 120 | return ret 121 | 122 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": 123 | """ 124 | Args: 125 | item: an index-like object and will be used to index all the fields. 126 | 127 | Returns: 128 | If `item` is a string, return the data in the corresponding field. 129 | Otherwise, returns an `Instances` where all fields are indexed by `item`. 130 | """ 131 | if type(item) == int: 132 | if item >= len(self) or item < -len(self): 133 | raise IndexError("Instances index out of range!") 134 | else: 135 | item = slice(item, None, len(self)) 136 | 137 | ret = Instances(self._image_size) 138 | for k, v in self._fields.items(): 139 | ret.set(k, v[item]) 140 | return ret 141 | 142 | def __len__(self) -> int: 143 | for v in self._fields.values(): 144 | # use __len__ because len() has to be int and is not friendly to tracing 145 | return v.__len__() 146 | raise NotImplementedError("Empty Instances does not support __len__!") 147 | 148 | def __iter__(self): 149 | raise NotImplementedError("`Instances` object is not iterable!") 150 | 151 | @staticmethod 152 | def cat(instance_lists: List["Instances"]) -> "Instances": 153 | """ 154 | Args: 155 | instance_lists (list[Instances]) 156 | 157 | Returns: 158 | Instances 159 | """ 160 | assert all(isinstance(i, Instances) for i in instance_lists) 161 | assert len(instance_lists) > 0 162 | if len(instance_lists) == 1: 163 | return instance_lists[0] 164 | 165 | image_size = instance_lists[0].image_size 166 | for i in instance_lists[1:]: 167 | assert i.image_size == image_size 168 | ret = Instances(image_size) 169 | for k in instance_lists[0]._fields.keys(): 170 | values = [i.get(k) for i in instance_lists] 171 | v0 = values[0] 172 | if isinstance(v0, torch.Tensor): 173 | values = torch.cat(values, dim=0) 174 | elif isinstance(v0, list): 175 | values = list(itertools.chain(*values)) 176 | elif hasattr(type(v0), "cat"): 177 | values = type(v0).cat(values) 178 | else: 179 | raise ValueError("Unsupported type {} for concatenation".format(type(v0))) 180 | ret.set(k, values) 181 | return ret 182 | 183 | def __str__(self) -> str: 184 | s = self.__class__.__name__ + "(" 185 | s += "num_instances={}, ".format(len(self)) 186 | s += "image_height={}, ".format(self._image_size[0]) 187 | s += "image_width={}, ".format(self._image_size[1]) 188 | s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) 189 | return s 190 | 191 | __repr__ = __str__ 192 | -------------------------------------------------------------------------------- /detectron2/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /detectron2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /detectron2/utils/analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import typing 6 | import torch 7 | from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table 8 | from torch import nn 9 | 10 | from detectron2.structures import BitMasks, Boxes, ImageList, Instances 11 | 12 | from .logger import log_first_n 13 | 14 | __all__ = [ 15 | "activation_count_operators", 16 | "flop_count_operators", 17 | "parameter_count_table", 18 | "parameter_count", 19 | ] 20 | 21 | FLOPS_MODE = "flops" 22 | ACTIVATIONS_MODE = "activations" 23 | 24 | 25 | # some extra ops to ignore from counting. 26 | _IGNORED_OPS = { 27 | "aten::add", 28 | "aten::add_", 29 | "aten::batch_norm", 30 | "aten::constant_pad_nd", 31 | "aten::div", 32 | "aten::div_", 33 | "aten::exp", 34 | "aten::log2", 35 | "aten::max_pool2d", 36 | "aten::meshgrid", 37 | "aten::mul", 38 | "aten::mul_", 39 | "aten::nonzero_numpy", 40 | "aten::rsub", 41 | "aten::sigmoid", 42 | "aten::sigmoid_", 43 | "aten::softmax", 44 | "aten::sort", 45 | "aten::sqrt", 46 | "aten::sub", 47 | "aten::upsample_nearest2d", 48 | "prim::PythonOp", 49 | "torchvision::nms", # TODO estimate flop for nms 50 | } 51 | 52 | 53 | def flop_count_operators( 54 | model: nn.Module, inputs: list, **kwargs 55 | ) -> typing.DefaultDict[str, float]: 56 | """ 57 | Implement operator-level flops counting using jit. 58 | This is a wrapper of fvcore.nn.flop_count, that supports standard detection models 59 | in detectron2. 60 | 61 | Note: 62 | The function runs the input through the model to compute flops. 63 | The flops of a detection model is often input-dependent, for example, 64 | the flops of box & mask head depends on the number of proposals & 65 | the number of detected objects. 66 | Therefore, the flops counting using a single input may not accurately 67 | reflect the computation cost of a model. 68 | 69 | Args: 70 | model: a detectron2 model that takes `list[dict]` as input. 71 | inputs (list[dict]): inputs to model, in detectron2's standard format. 72 | """ 73 | return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs) 74 | 75 | 76 | def activation_count_operators( 77 | model: nn.Module, inputs: list, **kwargs 78 | ) -> typing.DefaultDict[str, float]: 79 | """ 80 | Implement operator-level activations counting using jit. 81 | This is a wrapper of fvcore.nn.activation_count, that supports standard detection models 82 | in detectron2. 83 | 84 | Note: 85 | The function runs the input through the model to compute activations. 86 | The activations of a detection model is often input-dependent, for example, 87 | the activations of box & mask head depends on the number of proposals & 88 | the number of detected objects. 89 | 90 | Args: 91 | model: a detectron2 model that takes `list[dict]` as input. 92 | inputs (list[dict]): inputs to model, in detectron2's standard format. 93 | """ 94 | return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) 95 | 96 | 97 | def _flatten_to_tuple(outputs): 98 | result = [] 99 | if isinstance(outputs, torch.Tensor): 100 | result.append(outputs) 101 | elif isinstance(outputs, (list, tuple)): 102 | for v in outputs: 103 | result.extend(_flatten_to_tuple(v)) 104 | elif isinstance(outputs, dict): 105 | for _, v in outputs.items(): 106 | result.extend(_flatten_to_tuple(v)) 107 | elif isinstance(outputs, Instances): 108 | result.extend(_flatten_to_tuple(outputs.get_fields())) 109 | elif isinstance(outputs, (Boxes, BitMasks, ImageList)): 110 | result.append(outputs.tensor) 111 | else: 112 | log_first_n( 113 | logging.WARN, 114 | f"Output of type {type(outputs)} not included in flops/activations count.", 115 | n=10, 116 | ) 117 | return tuple(result) 118 | 119 | 120 | def _wrapper_count_operators( 121 | model: nn.Module, inputs: list, mode: str, **kwargs 122 | ) -> typing.DefaultDict[str, float]: 123 | 124 | # ignore some ops 125 | supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} 126 | supported_ops.update(kwargs.pop("supported_ops", {})) 127 | kwargs["supported_ops"] = supported_ops 128 | 129 | assert len(inputs) == 1, "Please use batch size=1" 130 | tensor_input = inputs[0]["image"] 131 | 132 | class WrapModel(nn.Module): 133 | def __init__(self, model): 134 | super().__init__() 135 | if isinstance( 136 | model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel) 137 | ): 138 | self.model = model.module 139 | else: 140 | self.model = model 141 | 142 | def forward(self, image): 143 | # jit requires the input/output to be Tensors 144 | inputs = [{"image": image}] 145 | outputs = self.model.forward(inputs) 146 | # Only the subgraph that computes the returned tuple of tensor will be 147 | # counted. So we flatten everything we found to tuple of tensors. 148 | return _flatten_to_tuple(outputs) 149 | 150 | old_train = model.training 151 | with torch.no_grad(): 152 | if mode == FLOPS_MODE: 153 | ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs) 154 | elif mode == ACTIVATIONS_MODE: 155 | ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs) 156 | else: 157 | raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) 158 | # compatible with change in fvcore 159 | if isinstance(ret, tuple): 160 | ret = ret[0] 161 | model.train(old_train) 162 | return ret 163 | -------------------------------------------------------------------------------- /detectron2/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ 4 | An awesome colormap for really neat visualizations. 5 | Copied from Detectron, and removed gray colors. 6 | """ 7 | 8 | import numpy as np 9 | 10 | __all__ = ["colormap", "random_color"] 11 | 12 | # fmt: off 13 | # RGB: 14 | _COLORS = np.array( 15 | [ 16 | 0.000, 0.447, 0.741, 17 | 0.850, 0.325, 0.098, 18 | 0.929, 0.694, 0.125, 19 | 0.494, 0.184, 0.556, 20 | 0.466, 0.674, 0.188, 21 | 0.301, 0.745, 0.933, 22 | 0.635, 0.078, 0.184, 23 | 0.300, 0.300, 0.300, 24 | 0.600, 0.600, 0.600, 25 | 1.000, 0.000, 0.000, 26 | 1.000, 0.500, 0.000, 27 | 0.749, 0.749, 0.000, 28 | 0.000, 1.000, 0.000, 29 | 0.000, 0.000, 1.000, 30 | 0.667, 0.000, 1.000, 31 | 0.333, 0.333, 0.000, 32 | 0.333, 0.667, 0.000, 33 | 0.333, 1.000, 0.000, 34 | 0.667, 0.333, 0.000, 35 | 0.667, 0.667, 0.000, 36 | 0.667, 1.000, 0.000, 37 | 1.000, 0.333, 0.000, 38 | 1.000, 0.667, 0.000, 39 | 1.000, 1.000, 0.000, 40 | 0.000, 0.333, 0.500, 41 | 0.000, 0.667, 0.500, 42 | 0.000, 1.000, 0.500, 43 | 0.333, 0.000, 0.500, 44 | 0.333, 0.333, 0.500, 45 | 0.333, 0.667, 0.500, 46 | 0.333, 1.000, 0.500, 47 | 0.667, 0.000, 0.500, 48 | 0.667, 0.333, 0.500, 49 | 0.667, 0.667, 0.500, 50 | 0.667, 1.000, 0.500, 51 | 1.000, 0.000, 0.500, 52 | 1.000, 0.333, 0.500, 53 | 1.000, 0.667, 0.500, 54 | 1.000, 1.000, 0.500, 55 | 0.000, 0.333, 1.000, 56 | 0.000, 0.667, 1.000, 57 | 0.000, 1.000, 1.000, 58 | 0.333, 0.000, 1.000, 59 | 0.333, 0.333, 1.000, 60 | 0.333, 0.667, 1.000, 61 | 0.333, 1.000, 1.000, 62 | 0.667, 0.000, 1.000, 63 | 0.667, 0.333, 1.000, 64 | 0.667, 0.667, 1.000, 65 | 0.667, 1.000, 1.000, 66 | 1.000, 0.000, 1.000, 67 | 1.000, 0.333, 1.000, 68 | 1.000, 0.667, 1.000, 69 | 0.333, 0.000, 0.000, 70 | 0.500, 0.000, 0.000, 71 | 0.667, 0.000, 0.000, 72 | 0.833, 0.000, 0.000, 73 | 1.000, 0.000, 0.000, 74 | 0.000, 0.167, 0.000, 75 | 0.000, 0.333, 0.000, 76 | 0.000, 0.500, 0.000, 77 | 0.000, 0.667, 0.000, 78 | 0.000, 0.833, 0.000, 79 | 0.000, 1.000, 0.000, 80 | 0.000, 0.000, 0.167, 81 | 0.000, 0.000, 0.333, 82 | 0.000, 0.000, 0.500, 83 | 0.000, 0.000, 0.667, 84 | 0.000, 0.000, 0.833, 85 | 0.000, 0.000, 1.000, 86 | 0.000, 0.000, 0.000, 87 | 0.143, 0.143, 0.143, 88 | 0.857, 0.857, 0.857, 89 | 1.000, 1.000, 1.000 90 | ] 91 | ).astype(np.float32).reshape(-1, 3) 92 | # fmt: on 93 | 94 | 95 | def colormap(rgb=False, maximum=255): 96 | """ 97 | Args: 98 | rgb (bool): whether to return RGB colors or BGR colors. 99 | maximum (int): either 255 or 1 100 | 101 | Returns: 102 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 103 | """ 104 | assert maximum in [255, 1], maximum 105 | c = _COLORS * maximum 106 | if not rgb: 107 | c = c[:, ::-1] 108 | return c 109 | 110 | 111 | def random_color(rgb=False, maximum=255): 112 | """ 113 | Args: 114 | rgb (bool): whether to return RGB colors or BGR colors. 115 | maximum (int): either 255 or 1 116 | 117 | Returns: 118 | ndarray: a vector of 3 numbers 119 | """ 120 | idx = np.random.randint(0, len(_COLORS)) 121 | ret = _COLORS[idx] * maximum 122 | if not rgb: 123 | ret = ret[::-1] 124 | return ret 125 | 126 | 127 | if __name__ == "__main__": 128 | import cv2 129 | 130 | size = 100 131 | H, W = 10, 10 132 | canvas = np.random.rand(H * size, W * size, 3).astype("float32") 133 | for h in range(H): 134 | for w in range(W): 135 | idx = h * W + w 136 | if idx >= len(_COLORS): 137 | break 138 | canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] 139 | cv2.imshow("a", canvas) 140 | cv2.waitKey(0) 141 | -------------------------------------------------------------------------------- /detectron2/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import importlib 3 | import importlib.util 4 | import logging 5 | import numpy as np 6 | import os 7 | import random 8 | import sys 9 | from datetime import datetime 10 | import torch 11 | 12 | __all__ = ["seed_all_rng"] 13 | 14 | 15 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) 16 | """ 17 | PyTorch version as a tuple of 2 ints. Useful for comparison. 18 | """ 19 | 20 | 21 | def seed_all_rng(seed=None): 22 | """ 23 | Set the random seed for the RNG in torch, numpy and python. 24 | 25 | Args: 26 | seed (int): if None, will use a strong random seed. 27 | """ 28 | if seed is None: 29 | seed = ( 30 | os.getpid() 31 | + int(datetime.now().strftime("%S%f")) 32 | + int.from_bytes(os.urandom(2), "big") 33 | ) 34 | logger = logging.getLogger(__name__) 35 | logger.info("Using a generated random seed {}".format(seed)) 36 | np.random.seed(seed) 37 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 38 | random.seed(seed) 39 | 40 | 41 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path 42 | def _import_file(module_name, file_path, make_importable=False): 43 | spec = importlib.util.spec_from_file_location(module_name, file_path) 44 | module = importlib.util.module_from_spec(spec) 45 | spec.loader.exec_module(module) 46 | if make_importable: 47 | sys.modules[module_name] = module 48 | return module 49 | 50 | 51 | def _configure_libraries(): 52 | """ 53 | Configurations for some libraries. 54 | """ 55 | # An environment option to disable `import cv2` globally, 56 | # in case it leads to negative performance impact 57 | disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) 58 | if disable_cv2: 59 | sys.modules["cv2"] = None 60 | else: 61 | # Disable opencl in opencv since its interaction with cuda often has negative effects 62 | # This envvar is supported after OpenCV 3.4.0 63 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" 64 | try: 65 | import cv2 66 | 67 | if int(cv2.__version__.split(".")[0]) >= 3: 68 | cv2.ocl.setUseOpenCL(False) 69 | except ModuleNotFoundError: 70 | # Other types of ImportError, if happened, should not be ignored. 71 | # Because a failed opencv import could mess up address space 72 | # https://github.com/skvark/opencv-python/issues/381 73 | pass 74 | 75 | def get_version(module, digit=2): 76 | return tuple(map(int, module.__version__.split(".")[:digit])) 77 | 78 | # fmt: off 79 | assert get_version(torch) >= (1, 4), "Requires torch>=1.4" 80 | import fvcore 81 | assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2" 82 | import yaml 83 | assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" 84 | # fmt: on 85 | 86 | 87 | _ENV_SETUP_DONE = False 88 | 89 | 90 | def setup_environment(): 91 | """Perform environment setup work. The default setup is a no-op, but this 92 | function allows the user to specify a Python source file or a module in 93 | the $DETECTRON2_ENV_MODULE environment variable, that performs 94 | custom setup work that may be necessary to their computing environment. 95 | """ 96 | global _ENV_SETUP_DONE 97 | if _ENV_SETUP_DONE: 98 | return 99 | _ENV_SETUP_DONE = True 100 | 101 | _configure_libraries() 102 | 103 | custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") 104 | 105 | if custom_module_path: 106 | setup_custom_environment(custom_module_path) 107 | else: 108 | # The default setup is a no-op 109 | pass 110 | 111 | 112 | def setup_custom_environment(custom_module): 113 | """ 114 | Load custom environment setup by importing a Python source file or a 115 | module, and run the setup function. 116 | """ 117 | if custom_module.endswith(".py"): 118 | module = _import_file("detectron2.utils.env.custom_module", custom_module) 119 | else: 120 | module = importlib.import_module(custom_module) 121 | assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( 122 | "Custom environment module defined in {} does not have the " 123 | "required callable attribute 'setup_environment'." 124 | ).format(custom_module) 125 | module.setup_environment() 126 | -------------------------------------------------------------------------------- /detectron2/utils/file_io.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase 2 | 3 | __all__ = ["PathManager", "PathHandler"] 4 | 5 | 6 | PathManager = PathManagerBase() 7 | """ 8 | This is a detectron2 project-specific PathManager. 9 | We try to stay away from global PathManager in fvcore as it 10 | introduces potential conflicts among other libraries. 11 | """ 12 | 13 | 14 | class Detectron2Handler(PathHandler): 15 | """ 16 | Resolve anything that's hosted under detectron2's namespace. 17 | """ 18 | 19 | PREFIX = "detectron2://" 20 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" 21 | 22 | def _get_supported_prefixes(self): 23 | return [self.PREFIX] 24 | 25 | def _get_local_path(self, path): 26 | name = path[len(self.PREFIX) :] 27 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) 28 | 29 | def _open(self, path, mode="r", **kwargs): 30 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 31 | 32 | 33 | PathManager.register_handler(HTTPURLHandler()) 34 | PathManager.register_handler(OneDrivePathHandler()) 35 | PathManager.register_handler(Detectron2Handler()) 36 | -------------------------------------------------------------------------------- /detectron2/utils/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | import torch 7 | 8 | __all__ = ["retry_if_cuda_oom"] 9 | 10 | 11 | @contextmanager 12 | def _ignore_torch_cuda_oom(): 13 | """ 14 | A context which ignores CUDA OOM exception from pytorch. 15 | """ 16 | try: 17 | yield 18 | except RuntimeError as e: 19 | # NOTE: the string may change? 20 | if "CUDA out of memory. " in str(e): 21 | pass 22 | else: 23 | raise 24 | 25 | 26 | def retry_if_cuda_oom(func): 27 | """ 28 | Makes a function retry itself after encountering 29 | pytorch's CUDA OOM error. 30 | It will first retry after calling `torch.cuda.empty_cache()`. 31 | 32 | If that still fails, it will then retry by trying to convert inputs to CPUs. 33 | In this case, it expects the function to dispatch to CPU implementation. 34 | The return values may become CPU tensors as well and it's user's 35 | responsibility to convert it back to CUDA tensor if needed. 36 | 37 | Args: 38 | func: a stateless callable that takes tensor-like objects as arguments 39 | 40 | Returns: 41 | a callable which retries `func` if OOM is encountered. 42 | 43 | Examples: 44 | :: 45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 46 | # output may be on CPU even if inputs are on GPU 47 | 48 | Note: 49 | 1. When converting inputs to CPU, it will only look at each argument and check 50 | if it has `.device` and `.to` for conversion. Nested structures of tensors 51 | are not supported. 52 | 53 | 2. Since the function might be called more than once, it has to be 54 | stateless. 55 | """ 56 | 57 | def maybe_to_cpu(x): 58 | try: 59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 60 | except AttributeError: 61 | like_gpu_tensor = False 62 | if like_gpu_tensor: 63 | return x.to(device="cpu") 64 | else: 65 | return x 66 | 67 | @wraps(func) 68 | def wrapped(*args, **kwargs): 69 | with _ignore_torch_cuda_oom(): 70 | return func(*args, **kwargs) 71 | 72 | # Clear cache and retry 73 | torch.cuda.empty_cache() 74 | with _ignore_torch_cuda_oom(): 75 | return func(*args, **kwargs) 76 | 77 | # Try on CPU. This slows down the code significantly, therefore print a notice. 78 | logger = logging.getLogger(__name__) 79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) 80 | new_args = (maybe_to_cpu(x) for x in args) 81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 82 | return func(*new_args, **new_kwargs) 83 | 84 | return wrapped 85 | -------------------------------------------------------------------------------- /detectron2/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Keep this module for backward compatibility. 4 | from fvcore.common.registry import Registry # noqa 5 | 6 | __all__ = ["Registry"] 7 | -------------------------------------------------------------------------------- /detectron2/utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /detectron2/utils/testing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from detectron2 import model_zoo 5 | from detectron2.data import DatasetCatalog 6 | from detectron2.data.detection_utils import read_image 7 | from detectron2.modeling import build_model 8 | from detectron2.structures import Boxes 9 | from detectron2.utils.file_io import PathManager 10 | 11 | 12 | """ 13 | Internal utilities for tests. Don't use except for writing tests. 14 | """ 15 | 16 | 17 | def get_model_no_weights(config_path): 18 | """ 19 | Like model_zoo.get, but do not load any weights (even pretrained) 20 | """ 21 | cfg = model_zoo.get_config(config_path) 22 | if not torch.cuda.is_available(): 23 | cfg.MODEL.DEVICE = "cpu" 24 | return build_model(cfg) 25 | 26 | 27 | def random_boxes(num_boxes, max_coord=100, device="cpu"): 28 | """ 29 | Create a random Nx4 boxes tensor, with coordinates < max_coord. 30 | """ 31 | boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5) 32 | boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression 33 | # Note: the implementation of this function in torchvision is: 34 | # boxes[:, 2:] += torch.rand(N, 2) * 100 35 | # but it does not guarantee non-negative widths/heights constraints: 36 | # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: 37 | boxes[:, 2:] += boxes[:, :2] 38 | return boxes 39 | 40 | 41 | def get_sample_coco_image(tensor=True): 42 | """ 43 | Args: 44 | tensor (bool): if True, returns 3xHxW tensor. 45 | else, returns a HxWx3 numpy array. 46 | 47 | Returns: 48 | an image, in BGR color. 49 | """ 50 | try: 51 | file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"] 52 | if not PathManager.exists(file_name): 53 | raise FileNotFoundError() 54 | except IOError: 55 | # for public CI to run 56 | file_name = "http://images.cocodataset.org/train2017/000000000009.jpg" 57 | ret = read_image(file_name, format="BGR") 58 | if tensor: 59 | ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1))) 60 | return ret 61 | 62 | 63 | def assert_instances_allclose(input, other, rtol=1e-5, msg=""): 64 | """ 65 | Args: 66 | input, other (Instances): 67 | """ 68 | if not msg: 69 | msg = "Two Instances are different! " 70 | else: 71 | msg = msg.rstrip() + " " 72 | assert input.image_size == other.image_size, ( 73 | msg + f"image_size is {input.image_size} vs. {other.image_size}!" 74 | ) 75 | fields = sorted(input.get_fields().keys()) 76 | fields_other = sorted(other.get_fields().keys()) 77 | assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!" 78 | 79 | for f in fields: 80 | val1, val2 = input.get(f), other.get(f) 81 | if isinstance(val1, Boxes): 82 | # boxes in the range of O(100) and can have a larger tolerance 83 | assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), ( 84 | msg + f"Field {f} differs too much!" 85 | ) 86 | elif isinstance(val1, torch.Tensor): 87 | if val1.dtype.is_floating_point: 88 | mag = torch.abs(val1).max().cpu().item() 89 | assert torch.allclose(val1, val2, atol=mag * rtol), ( 90 | msg + f"Field {f} differs too much!" 91 | ) 92 | else: 93 | assert torch.equal(val1, val2), msg + f"Field {f} is different!" 94 | else: 95 | raise ValueError(f"Don't know how to compare type {type(val1)}") 96 | -------------------------------------------------------------------------------- /projects/YOSO/configs/ade20k/panoptic-segmentation/Base-ADE20K-PanopticSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | PIXEL_MEAN: [123.675, 116.280, 103.530] 3 | PIXEL_STD: [58.395, 57.120, 57.375] 4 | META_ARCHITECTURE: "YOSO" 5 | DATASETS: 6 | TRAIN: ("ade20k_panoptic_train",) 7 | TEST: ("ade20k_panoptic_val",) 8 | SOLVER: 9 | IMS_PER_BATCH: 16 10 | BASE_LR: 0.0001 11 | MAX_ITER: 160000 12 | WARMUP_FACTOR: 1.0 13 | WARMUP_ITERS: 0 14 | WEIGHT_DECAY: 0.05 15 | OPTIMIZER: "ADAMW" 16 | LR_SCHEDULER_NAME: "WarmupPolyLR" 17 | BACKBONE_MULTIPLIER: 0.1 18 | CLIP_GRADIENTS: 19 | ENABLED: True 20 | CLIP_TYPE: "full_model" 21 | CLIP_VALUE: 0.01 22 | NORM_TYPE: 2.0 23 | AMP: 24 | ENABLED: False 25 | INPUT: 26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"] 27 | MIN_SIZE_TRAIN_SAMPLING: "choice" 28 | MIN_SIZE_TEST: 640 29 | MAX_SIZE_TRAIN: 2560 30 | MAX_SIZE_TEST: 2560 31 | CROP: 32 | ENABLED: True 33 | TYPE: "absolute" 34 | SIZE: (640, 640) 35 | SINGLE_CATEGORY_MAX_AREA: 1.0 36 | COLOR_AUG_SSD: True 37 | SIZE_DIVISIBILITY: 640 # used in dataset mapper 38 | FORMAT: "RGB" 39 | DATASET_MAPPER_NAME: "yoso_panoptic" 40 | TEST: 41 | EVAL_PERIOD: 5000 42 | AUG: 43 | ENABLED: False 44 | MIN_SIZES: [320, 480, 640, 800, 960, 1120] 45 | MAX_SIZE: 4480 46 | FLIP: True 47 | DATALOADER: 48 | FILTER_EMPTY_ANNOTATIONS: True 49 | NUM_WORKERS: 4 50 | VERSION: 2 51 | # CUDNN_BENCHMARK: True 52 | -------------------------------------------------------------------------------- /projects/YOSO/configs/ade20k/panoptic-segmentation/YOSO-R50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-ADE20K-PanopticSegmentation.yaml 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | YOSO: 12 | SIZE_DIVISIBILITY: 32 13 | # Structure 14 | NUM_CLASSES: 150 15 | NUM_STAGES: 2 16 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 17 | HIDDEN_DIM: 256 18 | NUM_PROPOSALS: 100 19 | CONV_KERNEL_SIZE_2D: 1 20 | CONV_KERNEL_SIZE_1D: 3 21 | NUM_CLS_FCS: 1 22 | NUM_MASK_FCS: 1 23 | # Loss 24 | NO_OBJECT_WEIGHT: 0.1 25 | CLASS_WEIGHT: 2.0 26 | MASK_WEIGHT: 5.0 27 | DICE_WEIGHT: 5.0 28 | TRAIN_NUM_POINTS: 12544 29 | OVERSAMPLE_RATIO: 3.0 30 | IMPORTANCE_SAMPLE_RATIO: 0.75 31 | TEMPERATIRE: 0.5 #1.0 32 | TEST: 33 | SEMANTIC_ON: False #True 34 | INSTANCE_ON: False #True 35 | PANOPTIC_ON: True 36 | OVERLAP_THRESHOLD: 0.8 37 | OBJECT_MASK_THRESHOLD: 0.2 38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_ade20k" 39 | -------------------------------------------------------------------------------- /projects/YOSO/configs/cityscapes/panoptic-segmentation/Base-Cityscapes-PanopticSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | PIXEL_MEAN: [123.675, 116.280, 103.530] 3 | PIXEL_STD: [58.395, 57.120, 57.375] 4 | META_ARCHITECTURE: "YOSO" 5 | DATASETS: 6 | TRAIN: ("cityscapes_fine_panoptic_train",) 7 | TEST: ("cityscapes_fine_panoptic_val",) 8 | SOLVER: 9 | IMS_PER_BATCH: 16 10 | BASE_LR: 0.0001 11 | MAX_ITER: 180000 #90000 # 12 | WARMUP_FACTOR: 1.0 13 | WARMUP_ITERS: 0 14 | WEIGHT_DECAY: 0.05 15 | OPTIMIZER: "ADAMW" 16 | LR_SCHEDULER_NAME: "WarmupPolyLR" 17 | BACKBONE_MULTIPLIER: 0.1 18 | CLIP_GRADIENTS: 19 | ENABLED: True 20 | CLIP_TYPE: "full_model" 21 | CLIP_VALUE: 0.01 22 | NORM_TYPE: 2.0 23 | AMP: 24 | ENABLED: False 25 | INPUT: 26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"] 27 | MIN_SIZE_TRAIN_SAMPLING: "choice" 28 | MAX_SIZE_TRAIN: 4096 29 | MIN_SIZE_TEST: 512 #1024 30 | MAX_SIZE_TEST: 1024 #2048 31 | CROP: 32 | ENABLED: True 33 | TYPE: "absolute" 34 | SIZE: (512, 1024) 35 | SINGLE_CATEGORY_MAX_AREA: 1.0 36 | COLOR_AUG_SSD: True 37 | SIZE_DIVISIBILITY: -1 38 | FORMAT: "RGB" 39 | DATASET_MAPPER_NAME: "yoso_panoptic" 40 | TEST: 41 | EVAL_PERIOD: 5000 42 | AUG: 43 | ENABLED: False 44 | MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792] 45 | MAX_SIZE: 4096 46 | FLIP: True 47 | DATALOADER: 48 | FILTER_EMPTY_ANNOTATIONS: True 49 | NUM_WORKERS: 4 50 | VERSION: 2 51 | CUDNN_BENCHMARK: True -------------------------------------------------------------------------------- /projects/YOSO/configs/cityscapes/panoptic-segmentation/YOSO-R50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-Cityscapes-PanopticSegmentation.yaml 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | NORM: "SyncBN" # use syncbn for cityscapes dataset 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 12 | YOSO: 13 | SIZE_DIVISIBILITY: 32 14 | # Structure 15 | NUM_CLASSES: 19 16 | NUM_STAGES: 2 17 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 18 | HIDDEN_DIM: 256 19 | NUM_PROPOSALS: 100 20 | CONV_KERNEL_SIZE_2D: 1 21 | CONV_KERNEL_SIZE_1D: 3 22 | NUM_CLS_FCS: 3 #1 23 | NUM_MASK_FCS: 3 #1 24 | # Loss 25 | NO_OBJECT_WEIGHT: 0.1 26 | CLASS_WEIGHT: 2.0 27 | MASK_WEIGHT: 5.0 28 | DICE_WEIGHT: 5.0 29 | TRAIN_NUM_POINTS: 12544 30 | OVERSAMPLE_RATIO: 3.0 31 | IMPORTANCE_SAMPLE_RATIO: 0.75 32 | TEMPERATIRE: 0.05 33 | TEST: 34 | SEMANTIC_ON: False #True 35 | INSTANCE_ON: False #True 36 | PANOPTIC_ON: True 37 | OVERLAP_THRESHOLD: 0.8 38 | OBJECT_MASK_THRESHOLD: 0.8 #0.5 39 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_cityscapes" -------------------------------------------------------------------------------- /projects/YOSO/configs/coco/panoptic-segmentation/Base-COCO-PanopticSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | PIXEL_MEAN: [123.675, 116.280, 103.530] 3 | PIXEL_STD: [58.395, 57.120, 57.375] 4 | META_ARCHITECTURE: "YOSO" 5 | DATASETS: 6 | TRAIN: ("coco_2017_train_panoptic",) 7 | TEST: ("coco_2017_val_panoptic_with_sem_seg",) # to evaluate instance and semantic performance as well 8 | SOLVER: 9 | IMS_PER_BATCH: 16 10 | BASE_LR: 0.0001 11 | STEPS: (327778, 355092) 12 | MAX_ITER: 368750 13 | WARMUP_FACTOR: 1.0 14 | WARMUP_ITERS: 10 15 | WEIGHT_DECAY: 0.05 16 | OPTIMIZER: "ADAMW" 17 | BACKBONE_MULTIPLIER: 0.1 18 | CLIP_GRADIENTS: 19 | ENABLED: True 20 | CLIP_TYPE: "full_model" 21 | CLIP_VALUE: 0.01 22 | NORM_TYPE: 2.0 23 | AMP: 24 | ENABLED: False 25 | INPUT: 26 | IMAGE_SIZE: 1024 27 | MIN_SCALE: 0.1 28 | MAX_SCALE: 2.0 29 | FORMAT: "RGB" 30 | DATASET_MAPPER_NAME: "yoso_panoptic_lsj" 31 | MIN_SIZE_TEST: 800 # 550 #512 32 | MAX_SIZE_TEST: 1333 # 800 #800 33 | TEST: 34 | EVAL_PERIOD: 5000 35 | DATALOADER: 36 | FILTER_EMPTY_ANNOTATIONS: True 37 | NUM_WORKERS: 4 38 | VERSION: 2 39 | # CUDNN_BENCHMARK: True -------------------------------------------------------------------------------- /projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-COCO-PanopticSegmentation.yaml 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | YOSO: 12 | SIZE_DIVISIBILITY: 32 13 | # Structure 14 | NUM_CLASSES: 133 15 | NUM_STAGES: 2 16 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 17 | HIDDEN_DIM: 256 18 | NUM_PROPOSALS: 100 19 | CONV_KERNEL_SIZE_2D: 1 20 | CONV_KERNEL_SIZE_1D: 3 21 | NUM_CLS_FCS: 1 22 | NUM_MASK_FCS: 1 23 | # Loss 24 | NO_OBJECT_WEIGHT: 0.1 25 | CLASS_WEIGHT: 2.0 26 | MASK_WEIGHT: 5.0 27 | DICE_WEIGHT: 5.0 28 | TRAIN_NUM_POINTS: 12544 29 | OVERSAMPLE_RATIO: 3.0 30 | IMPORTANCE_SAMPLE_RATIO: 0.75 31 | TEMPERATIRE: 0.05 32 | TEST: 33 | SEMANTIC_ON: False 34 | INSTANCE_ON: False 35 | PANOPTIC_ON: True 36 | OVERLAP_THRESHOLD: 0.8 37 | OBJECT_MASK_THRESHOLD: 0.7 38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_coco" -------------------------------------------------------------------------------- /projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/Base-MapillaryVistas-PanopticSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | PIXEL_MEAN: [123.675, 116.280, 103.530] 3 | PIXEL_STD: [58.395, 57.120, 57.375] 4 | META_ARCHITECTURE: "YOSO" 5 | DATASETS: 6 | TRAIN: ("mapillary_vistas_panoptic_train",) 7 | TEST: ("mapillary_vistas_panoptic_val",) 8 | SOLVER: 9 | IMS_PER_BATCH: 16 10 | BASE_LR: 0.0001 11 | MAX_ITER: 300000 12 | WARMUP_FACTOR: 1.0 13 | WARMUP_ITERS: 0 14 | WEIGHT_DECAY: 0.05 15 | OPTIMIZER: "ADAMW" 16 | LR_SCHEDULER_NAME: "WarmupPolyLR" 17 | BACKBONE_MULTIPLIER: 0.1 18 | CLIP_GRADIENTS: 19 | ENABLED: True 20 | CLIP_TYPE: "full_model" 21 | CLIP_VALUE: 0.01 22 | NORM_TYPE: 2.0 23 | AMP: 24 | ENABLED: False 25 | INPUT: 26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 2048) for x in range(5, 21)]"] 27 | MIN_SIZE_TRAIN_SAMPLING: "choice" 28 | MIN_SIZE_TEST: 2048 29 | MAX_SIZE_TRAIN: 8192 30 | MAX_SIZE_TEST: 2048 31 | CROP: 32 | ENABLED: True 33 | TYPE: "absolute" 34 | SIZE: (1024, 1024) 35 | SINGLE_CATEGORY_MAX_AREA: 1.0 36 | COLOR_AUG_SSD: True 37 | SIZE_DIVISIBILITY: -1 #1024 # used in dataset mapper 38 | FORMAT: "RGB" 39 | DATASET_MAPPER_NAME: "yoso_panoptic" 40 | TEST: 41 | EVAL_PERIOD: 20000 #5000 42 | DATALOADER: 43 | FILTER_EMPTY_ANNOTATIONS: True 44 | NUM_WORKERS: 4 45 | VERSION: 2 -------------------------------------------------------------------------------- /projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/YOSO-R50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-MapillaryVistas-PanopticSegmentation.yaml 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 11 | YOSO: 12 | SIZE_DIVISIBILITY: 32 13 | # Structure 14 | NUM_CLASSES: 65 15 | NUM_STAGES: 2 16 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 17 | HIDDEN_DIM: 256 18 | NUM_PROPOSALS: 150 #100 19 | CONV_KERNEL_SIZE_2D: 1 20 | CONV_KERNEL_SIZE_1D: 3 21 | NUM_CLS_FCS: 3 #1 22 | NUM_MASK_FCS: 3 #1 23 | # Loss 24 | NO_OBJECT_WEIGHT: 0.1 25 | CLASS_WEIGHT: 2.0 26 | MASK_WEIGHT: 5.0 27 | DICE_WEIGHT: 5.0 28 | TRAIN_NUM_POINTS: 12544 29 | OVERSAMPLE_RATIO: 3.0 30 | IMPORTANCE_SAMPLE_RATIO: 0.75 31 | TEMPERATIRE: 0.1 #0.5 32 | TEST: 33 | SEMANTIC_ON: False 34 | INSTANCE_ON: False 35 | PANOPTIC_ON: True 36 | OVERLAP_THRESHOLD: 0.8 37 | OBJECT_MASK_THRESHOLD: 0.0 38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_mapillary" 39 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import add_yoso_config 2 | from .segmentator import YOSO 3 | from . import data 4 | from .data.dataset_mappers.yoso_instance_lsj_dataset_mapper import YOSOInstanceLSJDatasetMapper 5 | from .data.dataset_mappers.yoso_panoptic_lsj_dataset_mapper import YOSOPanopticLSJDatasetMapper 6 | from .data.dataset_mappers.yoso_instance_dataset_mapper import YOSOInstanceDatasetMapper 7 | from .data.dataset_mappers.yoso_panoptic_dataset_mapper import YOSOPanopticDatasetMapper 8 | from .data.dataset_mappers.yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper 9 | from .utils import build_lr_scheduler, SemanticSegmentorWithTTA -------------------------------------------------------------------------------- /projects/YOSO/yoso/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode as CN 2 | 3 | def add_yoso_config(cfg): 4 | cfg.MODEL.YOSO = CN() 5 | cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32 6 | cfg.MODEL.YOSO.NUM_CLASSES = 133 7 | cfg.MODEL.YOSO.NUM_STAGES = 2 8 | 9 | cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"] 10 | cfg.MODEL.YOSO.HIDDEN_DIM = 256 11 | cfg.MODEL.YOSO.AGG_DIM = 128 12 | cfg.MODEL.YOSO.NUM_PROPOSALS = 100 13 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1 14 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3 15 | cfg.MODEL.YOSO.NUM_CLS_FCS = 1 16 | cfg.MODEL.YOSO.NUM_MASK_FCS = 1 17 | 18 | cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1 19 | cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0 20 | cfg.MODEL.YOSO.MASK_WEIGHT = 5.0 21 | cfg.MODEL.YOSO.DICE_WEIGHT = 5.0 22 | cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112 23 | cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0 24 | cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75 25 | cfg.MODEL.YOSO.TEMPERATIRE = 0.1 26 | 27 | cfg.MODEL.YOSO.TEST = CN() 28 | cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False 29 | cfg.MODEL.YOSO.TEST.INSTANCE_ON = False 30 | cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False 31 | cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0 32 | cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0 33 | cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False 34 | 35 | cfg.SOLVER.OPTIMIZER = "ADAMW" 36 | cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 37 | cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 38 | cfg.SOLVER.WEIGHT_DECAY_BIAS = None 39 | 40 | cfg.SOLVER.POLY_LR_POWER = 0.9 41 | cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0 42 | 43 | cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj" 44 | cfg.INPUT.SIZE_DIVISIBILITY = -1 45 | cfg.INPUT.COLOR_AUG_SSD = False 46 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 47 | 48 | cfg.INPUT.IMAGE_SIZE = 1024 49 | cfg.INPUT.MIN_SCALE = 0.1 50 | cfg.INPUT.MAX_SCALE = 2.0 51 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import datasets 3 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_dataset_mapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | import logging 4 | 5 | import numpy as np 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | from detectron2.config import configurable 10 | from detectron2.data import detection_utils as utils 11 | from detectron2.data import transforms as T 12 | from detectron2.structures import BitMasks, Instances 13 | 14 | from .yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper 15 | 16 | __all__ = ["YOSOPanopticDatasetMapper"] 17 | 18 | 19 | class YOSOPanopticDatasetMapper(YOSOSemanticDatasetMapper): 20 | """ 21 | A callable which takes a dataset dict in Detectron2 Dataset format, 22 | and map it into a format used by MaskFormer for panoptic segmentation. 23 | 24 | The callable currently does the following: 25 | 26 | 1. Read the image from "file_name" 27 | 2. Applies geometric transforms to the image and annotation 28 | 3. Find and applies suitable cropping to the image and annotation 29 | 4. Prepare image and annotation to Tensors 30 | """ 31 | 32 | @configurable 33 | def __init__( 34 | self, 35 | is_train=True, 36 | *, 37 | augmentations, 38 | image_format, 39 | ignore_label, 40 | size_divisibility, 41 | ): 42 | """ 43 | NOTE: this interface is experimental. 44 | Args: 45 | is_train: for training or inference 46 | augmentations: a list of augmentations or deterministic transforms to apply 47 | image_format: an image format supported by :func:`detection_utils.read_image`. 48 | ignore_label: the label that is ignored to evaluation 49 | size_divisibility: pad image size to be divisible by this value 50 | """ 51 | super().__init__( 52 | is_train, 53 | augmentations=augmentations, 54 | image_format=image_format, 55 | ignore_label=ignore_label, 56 | size_divisibility=size_divisibility, 57 | ) 58 | 59 | def __call__(self, dataset_dict): 60 | """ 61 | Args: 62 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 63 | 64 | Returns: 65 | dict: a format that builtin models in detectron2 accept 66 | """ 67 | assert self.is_train, "YOSOPanopticDatasetMapper should only be used for training!" 68 | 69 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 70 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format) 71 | utils.check_image_size(dataset_dict, image) 72 | 73 | # semantic segmentation 74 | if "sem_seg_file_name" in dataset_dict: 75 | # PyTorch transformation not implemented for uint16, so converting it to double first 76 | sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double") 77 | else: 78 | sem_seg_gt = None 79 | 80 | # panoptic segmentation 81 | if "pan_seg_file_name" in dataset_dict: 82 | pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") 83 | segments_info = dataset_dict["segments_info"] 84 | else: 85 | pan_seg_gt = None 86 | segments_info = None 87 | 88 | if pan_seg_gt is None: 89 | raise ValueError( 90 | "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format( 91 | dataset_dict["file_name"] 92 | ) 93 | ) 94 | 95 | aug_input = T.AugInput(image, sem_seg=sem_seg_gt) 96 | aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) 97 | image = aug_input.image 98 | if sem_seg_gt is not None: 99 | sem_seg_gt = aug_input.sem_seg 100 | 101 | # apply the same transformation to panoptic segmentation 102 | pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) 103 | 104 | from panopticapi.utils import rgb2id 105 | 106 | pan_seg_gt = rgb2id(pan_seg_gt) 107 | 108 | # Pad image and segmentation label here! 109 | image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) 110 | if sem_seg_gt is not None: 111 | sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) 112 | pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long")) 113 | 114 | if self.size_divisibility > 0: 115 | image_size = (image.shape[-2], image.shape[-1]) 116 | padding_size = [ 117 | 0, 118 | self.size_divisibility - image_size[1], 119 | 0, 120 | self.size_divisibility - image_size[0], 121 | ] 122 | image = F.pad(image, padding_size, value=128).contiguous() 123 | if sem_seg_gt is not None: 124 | sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous() 125 | pan_seg_gt = F.pad( 126 | pan_seg_gt, padding_size, value=0 127 | ).contiguous() # 0 is the VOID panoptic label 128 | 129 | image_shape = (image.shape[-2], image.shape[-1]) # h, w 130 | 131 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 132 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 133 | # Therefore it's important to use torch.Tensor. 134 | dataset_dict["image"] = image 135 | if sem_seg_gt is not None: 136 | dataset_dict["sem_seg"] = sem_seg_gt.long() 137 | 138 | if "annotations" in dataset_dict: 139 | raise ValueError("Pemantic segmentation dataset should not have 'annotations'.") 140 | 141 | # Prepare per-category binary masks 142 | pan_seg_gt = pan_seg_gt.numpy() 143 | instances = Instances(image_shape) 144 | classes = [] 145 | masks = [] 146 | for segment_info in segments_info: 147 | class_id = segment_info["category_id"] 148 | if not segment_info["iscrowd"]: 149 | classes.append(class_id) 150 | masks.append(pan_seg_gt == segment_info["id"]) 151 | 152 | classes = np.array(classes) 153 | instances.gt_classes = torch.tensor(classes, dtype=torch.int64) 154 | if len(masks) == 0: 155 | # Some image does not have annotation (all ignored) 156 | instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) 157 | else: 158 | masks = BitMasks( 159 | torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) 160 | ) 161 | instances.gt_masks = masks.tensor 162 | 163 | dataset_dict["instances"] = instances 164 | 165 | return dataset_dict 166 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_lsj_dataset_mapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py 3 | import copy 4 | import logging 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from detectron2.config import configurable 10 | from detectron2.data import detection_utils as utils 11 | from detectron2.data import transforms as T 12 | from detectron2.data.transforms import TransformGen 13 | from detectron2.structures import BitMasks, Boxes, Instances 14 | 15 | __all__ = ["YOSOPanopticLSJDatasetMapper"] 16 | 17 | 18 | def build_transform_gen(cfg, is_train): 19 | """ 20 | Create a list of default :class:`Augmentation` from config. 21 | Now it includes resizing and flipping. 22 | Returns: 23 | list[Augmentation] 24 | """ 25 | assert is_train, "Only support training augmentation" 26 | image_size = cfg.INPUT.IMAGE_SIZE 27 | min_scale = cfg.INPUT.MIN_SCALE 28 | max_scale = cfg.INPUT.MAX_SCALE 29 | 30 | augmentation = [] 31 | 32 | if cfg.INPUT.RANDOM_FLIP != "none": 33 | augmentation.append( 34 | T.RandomFlip( 35 | horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", 36 | vertical=cfg.INPUT.RANDOM_FLIP == "vertical", 37 | ) 38 | ) 39 | 40 | augmentation.extend([ 41 | T.ResizeScale( 42 | min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size 43 | ), 44 | T.FixedSizeCrop(crop_size=(image_size, image_size)), 45 | ]) 46 | 47 | return augmentation 48 | 49 | 50 | # This is specifically designed for the COCO dataset. 51 | class YOSOPanopticLSJDatasetMapper: 52 | """ 53 | A callable which takes a dataset dict in Detectron2 Dataset format, 54 | and map it into a format used by MaskFormer. 55 | 56 | This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. 57 | 58 | The callable currently does the following: 59 | 60 | 1. Read the image from "file_name" 61 | 2. Applies geometric transforms to the image and annotation 62 | 3. Find and applies suitable cropping to the image and annotation 63 | 4. Prepare image and annotation to Tensors 64 | """ 65 | 66 | @configurable 67 | def __init__( 68 | self, 69 | is_train=True, 70 | *, 71 | tfm_gens, 72 | image_format, 73 | ): 74 | """ 75 | NOTE: this interface is experimental. 76 | Args: 77 | is_train: for training or inference 78 | augmentations: a list of augmentations or deterministic transforms to apply 79 | crop_gen: crop augmentation 80 | tfm_gens: data augmentation 81 | image_format: an image format supported by :func:`detection_utils.read_image`. 82 | """ 83 | self.tfm_gens = tfm_gens 84 | logging.getLogger(__name__).info( 85 | "[YOSOPanopticLSJDatasetMapper] Full TransformGens used in training: {}".format( 86 | str(self.tfm_gens) 87 | ) 88 | ) 89 | 90 | self.img_format = image_format 91 | self.is_train = is_train 92 | 93 | @classmethod 94 | def from_config(cls, cfg, is_train=True): 95 | # Build augmentation 96 | tfm_gens = build_transform_gen(cfg, is_train) 97 | 98 | ret = { 99 | "is_train": is_train, 100 | "tfm_gens": tfm_gens, 101 | "image_format": cfg.INPUT.FORMAT, 102 | } 103 | return ret 104 | 105 | def __call__(self, dataset_dict): 106 | """ 107 | Args: 108 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 109 | 110 | Returns: 111 | dict: a format that builtin models in detectron2 accept 112 | """ 113 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 114 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format) 115 | utils.check_image_size(dataset_dict, image) 116 | 117 | image, transforms = T.apply_transform_gens(self.tfm_gens, image) 118 | image_shape = image.shape[:2] # h, w 119 | 120 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 121 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 122 | # Therefore it's important to use torch.Tensor. 123 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) 124 | 125 | if not self.is_train: 126 | # USER: Modify this if you want to keep them for some reason. 127 | dataset_dict.pop("annotations", None) 128 | return dataset_dict 129 | 130 | if "pan_seg_file_name" in dataset_dict: 131 | pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") 132 | segments_info = dataset_dict["segments_info"] 133 | 134 | # apply the same transformation to panoptic segmentation 135 | pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) 136 | 137 | from panopticapi.utils import rgb2id 138 | 139 | pan_seg_gt = rgb2id(pan_seg_gt) 140 | 141 | instances = Instances(image_shape) 142 | classes = [] 143 | masks = [] 144 | for segment_info in segments_info: 145 | class_id = segment_info["category_id"] 146 | if not segment_info["iscrowd"]: 147 | classes.append(class_id) 148 | masks.append(pan_seg_gt == segment_info["id"]) 149 | 150 | classes = np.array(classes) 151 | instances.gt_classes = torch.tensor(classes, dtype=torch.int64) 152 | if len(masks) == 0: 153 | # Some image does not have annotation (all ignored) 154 | instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) 155 | instances.gt_boxes = Boxes(torch.zeros((0, 4))) 156 | else: 157 | masks = BitMasks( 158 | torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) 159 | ) 160 | instances.gt_masks = masks.tensor 161 | instances.gt_boxes = masks.get_bounding_boxes() 162 | 163 | dataset_dict["instances"] = instances 164 | 165 | return dataset_dict 166 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import ( 3 | register_ade20k_full, 4 | register_ade20k_panoptic, 5 | register_coco_stuff_10k, 6 | register_mapillary_vistas, 7 | register_coco_panoptic_annos_semseg, 8 | register_ade20k_instance, 9 | register_mapillary_vistas_panoptic, 10 | ) 11 | -------------------------------------------------------------------------------- /projects/YOSO/yoso/data/datasets/register_ade20k_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import json 3 | import logging 4 | import numpy as np 5 | import os 6 | from PIL import Image 7 | 8 | from detectron2.data import DatasetCatalog, MetadataCatalog 9 | from detectron2.data.datasets.coco import load_coco_json, register_coco_instances 10 | from detectron2.utils.file_io import PathManager 11 | 12 | ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}] 13 | 14 | 15 | _PREDEFINED_SPLITS = { 16 | # point annotations without masks 17 | "ade20k_instance_train": ( 18 | "ADEChallengeData2016/images/training", 19 | "ADEChallengeData2016/ade20k_instance_train.json", 20 | ), 21 | "ade20k_instance_val": ( 22 | "ADEChallengeData2016/images/validation", 23 | "ADEChallengeData2016/ade20k_instance_val.json", 24 | ), 25 | } 26 | 27 | 28 | def _get_ade_instances_meta(): 29 | thing_ids = [k["id"] for k in ADE_CATEGORIES] 30 | assert len(thing_ids) == 100, len(thing_ids) 31 | # Mapping from the incontiguous ADE category id to an id in [0, 99] 32 | thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} 33 | thing_classes = [k["name"] for k in ADE_CATEGORIES] 34 | ret = { 35 | "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, 36 | "thing_classes": thing_classes, 37 | } 38 | return ret 39 | 40 | 41 | def register_all_ade20k_instance(root): 42 | for key, (image_root, json_file) in _PREDEFINED_SPLITS.items(): 43 | # Assume pre-defined datasets live in `./datasets`. 44 | register_coco_instances( 45 | key, 46 | _get_ade_instances_meta(), 47 | os.path.join(root, json_file) if "://" not in json_file else json_file, 48 | os.path.join(root, image_root), 49 | ) 50 | 51 | 52 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 53 | register_all_ade20k_instance(_root) 54 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=3 4 | include_trailing_comma=True 5 | known_standard_library=numpy,setuptools,mock 6 | skip=./datasets,docs 7 | skip_glob=*/__init__.py 8 | known_myself=detectron2 9 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx 10 | no_lines_before=STDLIB,THIRDPARTY 11 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 12 | default_section=FIRSTPARTY 13 | 14 | [mypy] 15 | python_version=3.6 16 | ignore_missing_imports = True 17 | warn_unused_configs = True 18 | disallow_untyped_defs = True 19 | check_untyped_defs = True 20 | warn_unused_ignores = True 21 | warn_redundant_casts = True 22 | show_column_numbers = True 23 | follow_imports = silent 24 | allow_redefinition = True 25 | ; Require all functions to be annotated 26 | disallow_incomplete_defs = True 27 | --------------------------------------------------------------------------------