├── .gitignore
├── LICENSE
├── README.md
├── demo
    ├── config.py
    ├── demo.py
    ├── evaluate_pq.ipynb
    ├── fig.png
    ├── head_Latency_and_FLOPs.ipynb
    ├── neck_Latency_and_FLOPs.ipynb
    └── predictor.py
├── detectron2
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── catalog.py
    │   └── detection_checkpoint.py
    ├── config
    │   ├── __init__.py
    │   ├── compat.py
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── catalog.py
    │   ├── common.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── builtin.py
    │   │   ├── builtin_meta.py
    │   │   ├── cityscapes.py
    │   │   ├── cityscapes_panoptic.py
    │   │   ├── coco.py
    │   │   ├── coco_panoptic.py
    │   │   ├── crowdhuman.py
    │   │   ├── lvis.py
    │   │   ├── lvis_v0_5_categories.py
    │   │   ├── lvis_v1_categories.py
    │   │   ├── mot.py
    │   │   ├── pascal_voc.py
    │   │   └── register_coco.py
    │   ├── detection_utils.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   └── grouped_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── augmentation.py
    │   │   ├── augmentation_impl.py
    │   │   └── transform.py
    ├── engine
    │   ├── __init__.py
    │   ├── defaults.py
    │   ├── hooks.py
    │   ├── launch.py
    │   └── train_loop.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── cityscapes_evaluation.py
    │   ├── coco_evaluation.py
    │   ├── evaluator.py
    │   ├── fast_eval_api.py
    │   ├── lvis_evaluation.py
    │   ├── panoptic_evaluation.py
    │   ├── pascal_voc_evaluation.py
    │   ├── rotated_coco_evaluation.py
    │   ├── sem_seg_evaluation.py
    │   └── testing.py
    ├── export
    │   ├── README.md
    │   ├── __init__.py
    │   ├── api.py
    │   ├── c10.py
    │   ├── caffe2_export.py
    │   ├── caffe2_inference.py
    │   ├── caffe2_modeling.py
    │   ├── caffe2_patch.py
    │   ├── shared.py
    │   ├── torchscript.py
    │   └── torchscript_patch.py
    ├── layers
    │   ├── __init__.py
    │   ├── aspp.py
    │   ├── batch_norm.py
    │   ├── blocks.py
    │   ├── csrc
    │   │   ├── README.md
    │   │   ├── ROIAlign
    │   │   │   ├── ROIAlign.h
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   └── ROIAlign_cuda.cu
    │   │   ├── ROIAlignRotated
    │   │   │   ├── ROIAlignRotated.h
    │   │   │   ├── ROIAlignRotated_cpu.cpp
    │   │   │   └── ROIAlignRotated_cuda.cu
    │   │   ├── box_iou_rotated
    │   │   │   ├── box_iou_rotated.h
    │   │   │   ├── box_iou_rotated_cpu.cpp
    │   │   │   ├── box_iou_rotated_cuda.cu
    │   │   │   └── box_iou_rotated_utils.h
    │   │   ├── cocoeval
    │   │   │   ├── cocoeval.cpp
    │   │   │   └── cocoeval.h
    │   │   ├── cuda_version.cu
    │   │   ├── deformable
    │   │   │   ├── deform_conv.h
    │   │   │   ├── deform_conv_cuda.cu
    │   │   │   └── deform_conv_cuda_kernel.cu
    │   │   ├── nms_rotated
    │   │   │   ├── nms_rotated.h
    │   │   │   ├── nms_rotated_cpu.cpp
    │   │   │   └── nms_rotated_cuda.cu
    │   │   └── vision.cpp
    │   ├── deform_conv.py
    │   ├── mask_ops.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_align_rotated.py
    │   ├── rotated_boxes.py
    │   ├── shape_spec.py
    │   └── wrappers.py
    ├── model_zoo
    │   ├── __init__.py
    │   └── model_zoo.py
    ├── modeling
    │   ├── __init__.py
    │   ├── anchor_generator.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── build.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── box_regression.py
    │   ├── matcher.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── panoptic_fpn.py
    │   │   ├── rcnn.py
    │   │   ├── retinanet.py
    │   │   └── semantic_seg.py
    │   ├── poolers.py
    │   ├── postprocessing.py
    │   ├── proposal_generator
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── proposal_utils.py
    │   │   ├── rpn.py
    │   │   └── rrpn.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── keypoint_head.py
    │   │   ├── mask_head.py
    │   │   ├── roi_heads.py
    │   │   └── rotated_fast_rcnn.py
    │   ├── sampling.py
    │   └── test_time_augmentation.py
    ├── projects
    │   ├── README.md
    │   └── __init__.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── boxes.py
    │   ├── image_list.py
    │   ├── instances.py
    │   ├── keypoints.py
    │   ├── masks.py
    │   └── rotated_boxes.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── collect_env.py
    │   ├── colormap.py
    │   ├── comm.py
    │   ├── env.py
    │   ├── events.py
    │   ├── file_io.py
    │   ├── logger.py
    │   ├── memory.py
    │   ├── registry.py
    │   ├── serialize.py
    │   ├── testing.py
    │   ├── video_visualizer.py
    │   └── visualizer.py
├── projects
    └── YOSO
    │   ├── configs
    │       ├── ade20k
    │       │   └── panoptic-segmentation
    │       │   │   ├── Base-ADE20K-PanopticSegmentation.yaml
    │       │   │   └── YOSO-R50.yaml
    │       ├── cityscapes
    │       │   └── panoptic-segmentation
    │       │   │   ├── Base-Cityscapes-PanopticSegmentation.yaml
    │       │   │   └── YOSO-R50.yaml
    │       ├── coco
    │       │   └── panoptic-segmentation
    │       │   │   ├── Base-COCO-PanopticSegmentation.yaml
    │       │   │   └── YOSO-R50.yaml
    │       └── mapillary-vistas
    │       │   └── panoptic-segmentation
    │       │       ├── Base-MapillaryVistas-PanopticSegmentation.yaml
    │       │       └── YOSO-R50.yaml
    │   ├── train_net.py
    │   └── yoso
    │       ├── __init__.py
    │       ├── config.py
    │       ├── data
    │           ├── __init__.py
    │           ├── dataset_mappers
    │           │   ├── yoso_instance_dataset_mapper.py
    │           │   ├── yoso_instance_lsj_dataset_mapper.py
    │           │   ├── yoso_panoptic_dataset_mapper.py
    │           │   ├── yoso_panoptic_lsj_dataset_mapper.py
    │           │   └── yoso_semantic_dataset_mapper.py
    │           └── datasets
    │           │   ├── __init__.py
    │           │   ├── register_ade20k_full.py
    │           │   ├── register_ade20k_instance.py
    │           │   ├── register_ade20k_panoptic.py
    │           │   ├── register_coco_panoptic_annos_semseg.py
    │           │   ├── register_coco_stuff_10k.py
    │           │   ├── register_mapillary_vistas.py
    │           │   └── register_mapillary_vistas_panoptic.py
    │       ├── head.py
    │       ├── loss.py
    │       ├── neck.py
    │       ├── segmentator.py
    │       └── utils.py
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output*
 3 | 
 4 | 
 5 | *.png
 6 | *.json
 7 | *.diff
 8 | *.jpg
 9 | !/projects/DensePose/doc/images/*.jpg
10 | 
11 | # compilation and distribution
12 | __pycache__
13 | _ext
14 | *.pyc
15 | *.pyd
16 | *.so
17 | *.dll
18 | *.egg-info/
19 | build/
20 | dist/
21 | wheels/
22 | 
23 | # pytorch/python/numpy formats
24 | *.pth
25 | *.pkl
26 | *.npy
27 | *.ts
28 | model_ts*.txt
29 | 
30 | # ipython/jupyter notebooks
31 | # *.ipynb
32 | **/.ipynb_checkpoints/
33 | 
34 | # Editor temporaries
35 | *.swn
36 | *.swo
37 | *.swp
38 | *~
39 | 
40 | # editor settings
41 | .idea
42 | .vscode
43 | _darcs
44 | 
45 | # project dirs
46 | # /detectron2/model_zoo/configs
47 | /datasets/*
48 | !/datasets/*.*
49 | /projects/*/datasets
50 | /models
51 | /snippet
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jie Hu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is the project page for paper:
 2 | >[**You Only Segment Once: Towards Real-Time Panoptic Segmentation**](https://arxiv.org/abs/2303.14651), In CVPR 2023.
 3 | 
 4 | <img width="90%" src="./demo/fig.png"/>
 5 | 
 6 | ## Model Zoo
 7 | 
 8 | On COCO validation set:
 9 | | Backbone | Scale | PQ | FPS| GPU | Model
10 | |:---:|:---:|:---:|:---:|:---:|:---:|
11 | |R50|800,1333|48.4|23.6|V100| [model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth) |
12 | |R50|512,800|46.4|45.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth)|
13 | 
14 | On Cityscapes validation set:
15 | | Backbone | Scale | PQ | FPS| GPU | Model
16 | |:---:|:---:|:---:|:---:|:---:|:---:|
17 | |R50|1024,2048|59.7|11.1|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)|
18 | |R50|512,1024|52.5|22.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)|
19 | 
20 | On ADE20k validation set:
21 | | Backbone | Scale | PQ | FPS| GPU | Model
22 | |:---:|:---:|:---:|:---:|:---:|:---:|
23 | |R50|640,2560|38.0|35.4|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_ade20k.pth)|
24 | 
25 | On Mapillary Vistas validation set:
26 | | Backbone | Scale | PQ | FPS| GPU | Model
27 | |:---:|:---:|:---:|:---:|:---:|:---:|
28 | |R50|2048,2048|34.1|7.1|A100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_mapillary.pth)|
29 | 
30 | ## Getting Started
31 | ### Installation
32 | We recommend to use [Anaconda](https://www.anaconda.com/) for installation.
33 | ```bash
34 | conda create -n YOSO python=3.8 -y
35 | conda activate YOSO
36 | conda install pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch
37 | pip install pycocotools -i https://pypi.douban.com/simple
38 | pip install git+https://github.com/cocodataset/panopticapi.git
39 | git clone https://github.com/hujiecpp/YOSO.git
40 | cd YOSO
41 | python setup.py develop
42 | ```
43 | 
44 | ### Datasets Preparation
45 | See [Preparing Datasets for Mask2Former](https://github.com/facebookresearch/Mask2Former/tree/main/datasets).
46 | 
47 | ### Training & Evaluation
48 | 
49 | - Train YOSO (e.g., on COCO dataset with R50 backbone).
50 | ```bash
51 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml
52 | ```
53 | 
54 | - Evaluate YOSO (e.g., on COCO dataset with R50 backbone).
55 | ```bash
56 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --eval-only MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth
57 | ```
58 | 
59 | ### Inference on Custom Image or Video
60 | 
61 | - Run YOSO demo (e.g., on video with R50 backbone).
62 | ```bash
63 | python demo/demo.py --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --video-input input_video.mp4 --output output_video.mp4 --opts MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth
64 | ```
65 | 
66 | ## Acknowledgements
67 | 
68 | - [Mask2Former](https://github.com/facebookresearch/Mask2Former)
69 | - [K-Net](https://github.com/ZwwWayne/K-Net)
70 | 
71 | ## Citing YOSO
72 | 
73 | If YOSO helps your research, please cite it in your publications:
74 | 
75 | ```BibTeX
76 | @inproceedings{hu2023you,
77 |   title={You Only Segment Once: Towards Real-Time Panoptic Segmentation},
78 |   author={Hu, Jie and Huang, Linyan and Ren, Tianhe and Zhang, Shengchuan and Ji, Rongrong and Cao, Liujuan},
79 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
80 |   pages={17819--17829},
81 |   year={2023}
82 | }
83 | ```
84 | 


--------------------------------------------------------------------------------
/demo/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode as CN
 2 | 
 3 | def add_yoso_config(cfg):
 4 |     cfg.MODEL.YOSO = CN()
 5 |     cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32
 6 |     cfg.MODEL.YOSO.NUM_CLASSES = 133
 7 |     cfg.MODEL.YOSO.NUM_STAGES = 2
 8 |     
 9 |     cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"]
10 |     cfg.MODEL.YOSO.HIDDEN_DIM = 256
11 |     cfg.MODEL.YOSO.AGG_DIM = 128
12 |     cfg.MODEL.YOSO.NUM_PROPOSALS = 100
13 |     cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1
14 |     cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3
15 |     cfg.MODEL.YOSO.NUM_CLS_FCS = 1
16 |     cfg.MODEL.YOSO.NUM_MASK_FCS = 1
17 | 
18 |     cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1
19 |     cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0
20 |     cfg.MODEL.YOSO.MASK_WEIGHT = 5.0
21 |     cfg.MODEL.YOSO.DICE_WEIGHT = 5.0
22 |     cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112
23 |     cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0
24 |     cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75
25 |     cfg.MODEL.YOSO.TEMPERATIRE = 0.1
26 | 
27 |     cfg.MODEL.YOSO.TEST = CN()
28 |     cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False
29 |     cfg.MODEL.YOSO.TEST.INSTANCE_ON = False
30 |     cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False
31 |     cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0
32 |     cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0
33 |     cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
34 |     
35 |     cfg.SOLVER.OPTIMIZER = "ADAMW"
36 |     cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
37 |     cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
38 |     cfg.SOLVER.WEIGHT_DECAY_BIAS = None
39 |     
40 |     cfg.SOLVER.POLY_LR_POWER = 0.9
41 |     cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0
42 | 
43 |     cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj"
44 |     cfg.INPUT.SIZE_DIVISIBILITY = -1
45 |     cfg.INPUT.COLOR_AUG_SSD = False
46 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
47 | 
48 |     cfg.INPUT.IMAGE_SIZE = 1024
49 |     cfg.INPUT.MIN_SCALE = 0.1
50 |     cfg.INPUT.MAX_SCALE = 2.0
51 | 
52 | 


--------------------------------------------------------------------------------
/demo/fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hujiecpp/YOSO/04b898d395ffd8318aa3761b0b2b6d20b3514f26/demo/fig.png


--------------------------------------------------------------------------------
/detectron2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from .utils.env import setup_environment
 4 | 
 5 | setup_environment()
 6 | 
 7 | 
 8 | # This line will be programatically read/write by setup.py.
 9 | # Leave them at the bottom of this file and don't touch them.
10 | __version__ = "0.3"
11 | 


--------------------------------------------------------------------------------
/detectron2/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # File:
 4 | 
 5 | 
 6 | from . import catalog as _UNUSED  # register the handler
 7 | from .detection_checkpoint import DetectionCheckpointer
 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
 9 | 
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 | 


--------------------------------------------------------------------------------
/detectron2/checkpoint/catalog.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import logging
  3 | 
  4 | from detectron2.utils.file_io import PathHandler, PathManager
  5 | 
  6 | 
  7 | class ModelCatalog(object):
  8 |     """
  9 |     Store mappings from names to third-party models.
 10 |     """
 11 | 
 12 |     S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
 13 | 
 14 |     # MSRA models have STRIDE_IN_1X1=True. False otherwise.
 15 |     # NOTE: all BN models here have fused BN into an affine layer.
 16 |     # As a result, you should only load them to a model with "FrozenBN".
 17 |     # Loading them to a model with regular BN or SyncBN is wrong.
 18 |     # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
 19 |     # which should be negligible for training.
 20 |     # NOTE: all models here uses PIXEL_STD=[1,1,1]
 21 |     # NOTE: Most of the BN models here are no longer used. We use the
 22 |     # re-converted pre-trained models under detectron2 model zoo instead.
 23 |     C2_IMAGENET_MODELS = {
 24 |         "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
 25 |         "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
 26 |         "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
 27 |         "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
 28 |         "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
 29 |         "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
 30 |         "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
 31 |     }
 32 | 
 33 |     C2_DETECTRON_PATH_FORMAT = (
 34 |         "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
 35 |     )
 36 | 
 37 |     C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
 38 |     C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
 39 | 
 40 |     # format: {model_name} -> part of the url
 41 |     C2_DETECTRON_MODELS = {
 42 |         "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
 43 |         "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
 44 |         "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
 45 |         "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
 46 |         "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
 47 |         "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
 48 |         "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
 49 |         "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
 50 |         "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
 51 |         "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
 52 |         "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
 53 |         "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
 54 |         "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
 55 |     }
 56 | 
 57 |     @staticmethod
 58 |     def get(name):
 59 |         if name.startswith("Caffe2Detectron/COCO"):
 60 |             return ModelCatalog._get_c2_detectron_baseline(name)
 61 |         if name.startswith("ImageNetPretrained/"):
 62 |             return ModelCatalog._get_c2_imagenet_pretrained(name)
 63 |         raise RuntimeError("model not present in the catalog: {}".format(name))
 64 | 
 65 |     @staticmethod
 66 |     def _get_c2_imagenet_pretrained(name):
 67 |         prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
 68 |         name = name[len("ImageNetPretrained/") :]
 69 |         name = ModelCatalog.C2_IMAGENET_MODELS[name]
 70 |         url = "/".join([prefix, name])
 71 |         return url
 72 | 
 73 |     @staticmethod
 74 |     def _get_c2_detectron_baseline(name):
 75 |         name = name[len("Caffe2Detectron/COCO/") :]
 76 |         url = ModelCatalog.C2_DETECTRON_MODELS[name]
 77 |         if "keypoint_rcnn" in name:
 78 |             dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
 79 |         else:
 80 |             dataset = ModelCatalog.C2_DATASET_COCO
 81 | 
 82 |         if "35998355/rpn_R-50-C4_1x" in name:
 83 |             # this one model is somehow different from others ..
 84 |             type = "rpn"
 85 |         else:
 86 |             type = "generalized_rcnn"
 87 | 
 88 |         # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
 89 |         url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
 90 |             prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
 91 |         )
 92 |         return url
 93 | 
 94 | 
 95 | class ModelCatalogHandler(PathHandler):
 96 |     """
 97 |     Resolve URL like catalog://.
 98 |     """
 99 | 
100 |     PREFIX = "catalog://"
101 | 
102 |     def _get_supported_prefixes(self):
103 |         return [self.PREFIX]
104 | 
105 |     def _get_local_path(self, path):
106 |         logger = logging.getLogger(__name__)
107 |         catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
108 |         logger.info("Catalog entry {} points to {}".format(path, catalog_path))
109 |         return PathManager.get_local_path(catalog_path)
110 | 
111 |     def _open(self, path, mode="r", **kwargs):
112 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
113 | 
114 | 
115 | PathManager.register_handler(ModelCatalogHandler())
116 | 


--------------------------------------------------------------------------------
/detectron2/checkpoint/detection_checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import pickle
 3 | from fvcore.common.checkpoint import Checkpointer
 4 | 
 5 | import detectron2.utils.comm as comm
 6 | from detectron2.utils.file_io import PathManager
 7 | 
 8 | from .c2_model_loading import align_and_update_state_dicts
 9 | 
10 | 
11 | class DetectionCheckpointer(Checkpointer):
12 |     """
13 |     Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
14 |     model zoo, and apply conversions for legacy models.
15 |     """
16 | 
17 |     def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
18 |         is_main_process = comm.is_main_process()
19 |         super().__init__(
20 |             model,
21 |             save_dir,
22 |             save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
23 |             **checkpointables,
24 |         )
25 |         if hasattr(self, "path_manager"):
26 |             self.path_manager = PathManager
27 |         else:
28 |             # This could only happen for open source
29 |             # TODO remove after upgrading fvcore
30 |             from fvcore.common.file_io import PathManager as g_PathManager
31 | 
32 |             for handler in PathManager._path_handlers.values():
33 |                 try:
34 |                     g_PathManager.register_handler(handler)
35 |                 except KeyError:
36 |                     pass
37 | 
38 |     def _load_file(self, filename):
39 |         if filename.endswith(".pkl"):
40 |             with PathManager.open(filename, "rb") as f:
41 |                 data = pickle.load(f, encoding="latin1")
42 |             if "model" in data and "__author__" in data:
43 |                 # file is in Detectron2 model zoo format
44 |                 self.logger.info("Reading a file from '{}'".format(data["__author__"]))
45 |                 return data
46 |             else:
47 |                 # assume file is from Caffe2 / Detectron1 model zoo
48 |                 if "blobs" in data:
49 |                     # Detection models have "blobs", but ImageNet models don't
50 |                     data = data["blobs"]
51 |                 data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
52 |                 return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
53 | 
54 |         loaded = super()._load_file(filename)  # load native pth checkpoint
55 |         if "model" not in loaded:
56 |             loaded = {"model": loaded}
57 |         return loaded
58 | 
59 |     def _load_model(self, checkpoint):
60 |         if checkpoint.get("matching_heuristics", False):
61 |             self._convert_ndarray_to_tensor(checkpoint["model"])
62 |             # convert weights by name-matching heuristics
63 |             model_state_dict = self.model.state_dict()
64 |             align_and_update_state_dicts(
65 |                 model_state_dict,
66 |                 checkpoint["model"],
67 |                 c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
68 |             )
69 |             checkpoint["model"] = model_state_dict
70 |         # for non-caffe2 models, use standard ways to load it
71 |         incompatible = super()._load_model(checkpoint)
72 |         if incompatible is None:  # support older versions of fvcore
73 |             return None
74 | 
75 |         model_buffers = dict(self.model.named_buffers(recurse=False))
76 |         for k in ["pixel_mean", "pixel_std"]:
77 |             # Ignore missing key message about pixel_mean/std.
78 |             # Though they may be missing in old checkpoints, they will be correctly
79 |             # initialized from config anyway.
80 |             if k in model_buffers:
81 |                 try:
82 |                     incompatible.missing_keys.remove(k)
83 |                 except ValueError:
84 |                     pass
85 |         return incompatible
86 | 


--------------------------------------------------------------------------------
/detectron2/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .compat import downgrade_config, upgrade_config
 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
 4 | 
 5 | __all__ = [
 6 |     "CfgNode",
 7 |     "get_cfg",
 8 |     "global_cfg",
 9 |     "set_global_cfg",
10 |     "downgrade_config",
11 |     "upgrade_config",
12 |     "configurable",
13 | ]
14 | 


--------------------------------------------------------------------------------
/detectron2/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import transforms  # isort:skip
 3 | 
 4 | from .build import (
 5 |     build_batch_data_loader,
 6 |     build_detection_test_loader,
 7 |     build_detection_train_loader,
 8 |     get_detection_dataset_dicts,
 9 |     load_proposals_into_dataset,
10 |     print_instances_class_histogram,
11 | )
12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata
13 | from .common import DatasetFromList, MapDataset
14 | from .dataset_mapper import DatasetMapper
15 | 
16 | # ensure the builtin datasets are registered
17 | from . import datasets, samplers  # isort:skip
18 | 
19 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
20 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### Common Datasets
 4 | 
 5 | The dataset implemented here do not need to load the data into the final format.
 6 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
 7 | 
 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images.
 9 | Let the downstream decide how to read.
10 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances
 3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
 5 | from .pascal_voc import load_voc_instances, register_pascal_voc
 6 | from . import builtin as _builtin  # ensure the builtin datasets are registered
 7 | 
 8 | 
 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | import numpy as np
 5 | import os
 6 | import xml.etree.ElementTree as ET
 7 | from typing import List, Tuple, Union
 8 | 
 9 | from detectron2.data import DatasetCatalog, MetadataCatalog
10 | from detectron2.structures import BoxMode
11 | from detectron2.utils.file_io import PathManager
12 | 
13 | __all__ = ["load_voc_instances", "register_pascal_voc"]
14 | 
15 | 
16 | # fmt: off
17 | CLASS_NAMES = (
18 |     "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
19 |     "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
20 |     "pottedplant", "sheep", "sofa", "train", "tvmonitor"
21 | )
22 | # fmt: on
23 | 
24 | 
25 | def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
26 |     """
27 |     Load Pascal VOC detection annotations to Detectron2 format.
28 | 
29 |     Args:
30 |         dirname: Contain "Annotations", "ImageSets", "JPEGImages"
31 |         split (str): one of "train", "test", "val", "trainval"
32 |         class_names: list or tuple of class names
33 |     """
34 |     with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
35 |         fileids = np.loadtxt(f, dtype=np.str)
36 | 
37 |     # Needs to read many small annotation files. Makes sense at local
38 |     annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
39 |     dicts = []
40 |     for fileid in fileids:
41 |         anno_file = os.path.join(annotation_dirname, fileid + ".xml")
42 |         jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
43 | 
44 |         with PathManager.open(anno_file) as f:
45 |             tree = ET.parse(f)
46 | 
47 |         r = {
48 |             "file_name": jpeg_file,
49 |             "image_id": fileid,
50 |             "height": int(tree.findall("./size/height")[0].text),
51 |             "width": int(tree.findall("./size/width")[0].text),
52 |         }
53 |         instances = []
54 | 
55 |         for obj in tree.findall("object"):
56 |             cls = obj.find("name").text
57 |             # We include "difficult" samples in training.
58 |             # Based on limited experiments, they don't hurt accuracy.
59 |             # difficult = int(obj.find("difficult").text)
60 |             # if difficult == 1:
61 |             # continue
62 |             bbox = obj.find("bndbox")
63 |             bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
64 |             # Original annotations are integers in the range [1, W or H]
65 |             # Assuming they mean 1-based pixel indices (inclusive),
66 |             # a box with annotation (xmin=1, xmax=W) covers the whole image.
67 |             # In coordinate space this is represented by (xmin=0, xmax=W)
68 |             bbox[0] -= 1.0
69 |             bbox[1] -= 1.0
70 |             instances.append(
71 |                 {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
72 |             )
73 |         r["annotations"] = instances
74 |         dicts.append(r)
75 |     return dicts
76 | 
77 | 
78 | def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
79 |     DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
80 |     MetadataCatalog.get(name).set(
81 |         thing_classes=list(class_names), dirname=dirname, year=year, split=split
82 |     )
83 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/register_coco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .coco import register_coco_instances  # noqa
3 | from .coco_panoptic import register_coco_panoptic_separated  # noqa
4 | 


--------------------------------------------------------------------------------
/detectron2/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, RandomSubsetTrainingSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | 
 5 | __all__ = [
 6 |     "GroupedBatchSampler",
 7 |     "TrainingSampler",
 8 |     "InferenceSampler",
 9 |     "RepeatFactorTrainingSampler",
10 |     "RandomSubsetTrainingSampler"
11 | ]
12 | 


--------------------------------------------------------------------------------
/detectron2/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import numpy as np
 3 | from torch.utils.data.sampler import BatchSampler, Sampler
 4 | 
 5 | 
 6 | class GroupedBatchSampler(BatchSampler):
 7 |     """
 8 |     Wraps another sampler to yield a mini-batch of indices.
 9 |     It enforces that the batch only contain elements from the same group.
10 |     It also tries to provide mini-batches which follows an ordering which is
11 |     as close as possible to the ordering from the original sampler.
12 |     """
13 | 
14 |     def __init__(self, sampler, group_ids, batch_size):
15 |         """
16 |         Args:
17 |             sampler (Sampler): Base sampler.
18 |             group_ids (list[int]): If the sampler produces indices in range [0, N),
19 |                 `group_ids` must be a list of `N` ints which contains the group id of each sample.
20 |                 The group ids must be a set of integers in the range [0, num_groups).
21 |             batch_size (int): Size of mini-batch.
22 |         """
23 |         if not isinstance(sampler, Sampler):
24 |             raise ValueError(
25 |                 "sampler should be an instance of "
26 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
27 |             )
28 |         self.sampler = sampler
29 |         self.group_ids = np.asarray(group_ids)
30 |         assert self.group_ids.ndim == 1
31 |         self.batch_size = batch_size
32 |         groups = np.unique(self.group_ids).tolist()
33 | 
34 |         # buffer the indices of each group until batch size is reached
35 |         self.buffer_per_group = {k: [] for k in groups}
36 | 
37 |     def __iter__(self):
38 |         for idx in self.sampler:
39 |             group_id = self.group_ids[idx]
40 |             group_buffer = self.buffer_per_group[group_id]
41 |             group_buffer.append(idx)
42 |             if len(group_buffer) == self.batch_size:
43 |                 yield group_buffer[:]  # yield a copy of the list
44 |                 del group_buffer[:]
45 | 
46 |     def __len__(self):
47 |         raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
48 | 


--------------------------------------------------------------------------------
/detectron2/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from fvcore.transforms.transform import Transform, TransformList  # order them first
3 | from fvcore.transforms.transform import *
4 | from .transform import *
5 | from .augmentation import *
6 | from .augmentation_impl import *
7 | 
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 | 


--------------------------------------------------------------------------------
/detectron2/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from .launch import *
 4 | from .train_loop import *
 5 | 
 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 7 | 
 8 | 
 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
10 | # but still make them available here
11 | from .hooks import *
12 | from .defaults import *
13 | 


--------------------------------------------------------------------------------
/detectron2/engine/launch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | import torch
 4 | import torch.distributed as dist
 5 | import torch.multiprocessing as mp
 6 | 
 7 | from detectron2.utils import comm
 8 | 
 9 | __all__ = ["launch"]
10 | 
11 | 
12 | def _find_free_port():
13 |     import socket
14 | 
15 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
16 |     # Binding to port 0 will cause the OS to find an available port for us
17 |     sock.bind(("", 0))
18 |     port = sock.getsockname()[1]
19 |     sock.close()
20 |     # NOTE: there is still a chance the port could be taken by other processes.
21 |     return port
22 | 
23 | 
24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()):
25 |     """
26 |     Launch multi-gpu or distributed training.
27 |     This function must be called on all machines involved in the training.
28 |     It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
29 | 
30 |     Args:
31 |         main_func: a function that will be called by `main_func(*args)`
32 |         num_gpus_per_machine (int): number of GPUs per machine
33 |         num_machines (int): the total number of machines
34 |         machine_rank (int): the rank of this machine
35 |         dist_url (str): url to connect to for distributed jobs, including protocol
36 |                        e.g. "tcp://127.0.0.1:8686".
37 |                        Can be set to "auto" to automatically select a free port on localhost
38 |         args (tuple): arguments passed to main_func
39 |     """
40 |     world_size = num_machines * num_gpus_per_machine
41 |     if world_size > 1:
42 |         # https://github.com/pytorch/pytorch/pull/14391
43 |         # TODO prctl in spawned processes
44 | 
45 |         if dist_url == "auto":
46 |             assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
47 |             port = _find_free_port()
48 |             dist_url = f"tcp://127.0.0.1:{port}"
49 |         if num_machines > 1 and dist_url.startswith("file://"):
50 |             logger = logging.getLogger(__name__)
51 |             logger.warning(
52 |                 "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
53 |             )
54 | 
55 |         mp.spawn(
56 |             _distributed_worker,
57 |             nprocs=num_gpus_per_machine,
58 |             args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args),
59 |             daemon=False,
60 |         )
61 |     else:
62 |         main_func(*args)
63 | 
64 | 
65 | def _distributed_worker(
66 |     local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
67 | ):
68 |     assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
69 |     global_rank = machine_rank * num_gpus_per_machine + local_rank
70 |     try:
71 |         dist.init_process_group(
72 |             backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
73 |         )
74 |     except Exception as e:
75 |         logger = logging.getLogger(__name__)
76 |         logger.error("Process group URL: {}".format(dist_url))
77 |         raise e
78 |     # synchronize is needed here to prevent a possible timeout after calling init_process_group
79 |     # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
80 |     comm.synchronize()
81 | 
82 |     assert num_gpus_per_machine <= torch.cuda.device_count()
83 |     torch.cuda.set_device(local_rank)
84 | 
85 |     # Setup the local process group (which contains ranks within the same machine)
86 |     assert comm._LOCAL_PROCESS_GROUP is None
87 |     num_machines = world_size // num_gpus_per_machine
88 |     for i in range(num_machines):
89 |         ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
90 |         pg = dist.new_group(ranks_on_i)
91 |         if i == machine_rank:
92 |             comm._LOCAL_PROCESS_GROUP = pg
93 | 
94 |     main_func(*args)
95 | 


--------------------------------------------------------------------------------
/detectron2/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
 3 | from .coco_evaluation import COCOEvaluator
 4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator
 5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
 6 | from .lvis_evaluation import LVISEvaluator
 7 | from .panoptic_evaluation import COCOPanopticEvaluator
 8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
 9 | from .sem_seg_evaluation import SemSegEvaluator
10 | from .testing import print_csv_format, verify_results
11 | 
12 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
13 | 


--------------------------------------------------------------------------------
/detectron2/evaluation/fast_eval_api.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | import numpy as np
  4 | import time
  5 | from pycocotools.cocoeval import COCOeval
  6 | 
  7 | from detectron2 import _C
  8 | 
  9 | 
 10 | class COCOeval_opt(COCOeval):
 11 |     """
 12 |     This is a slightly modified version of the original COCO API, where the functions evaluateImg()
 13 |     and accumulate() are implemented in C++ to speedup evaluation
 14 |     """
 15 | 
 16 |     def evaluate(self):
 17 |         """
 18 |         Run per image evaluation on given images and store results in self.evalImgs_cpp, a
 19 |         datastructure that isn't readable from Python but is used by a c++ implementation of
 20 |         accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
 21 |         self.evalImgs because this datastructure is a computational bottleneck.
 22 |         :return: None
 23 |         """
 24 |         tic = time.time()
 25 | 
 26 |         print("Running per image evaluation...")
 27 |         p = self.params
 28 |         # add backward compatibility if useSegm is specified in params
 29 |         if p.useSegm is not None:
 30 |             p.iouType = "segm" if p.useSegm == 1 else "bbox"
 31 |             print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType))
 32 |         print("Evaluate annotation type *{}*".format(p.iouType))
 33 |         p.imgIds = list(np.unique(p.imgIds))
 34 |         if p.useCats:
 35 |             p.catIds = list(np.unique(p.catIds))
 36 |         p.maxDets = sorted(p.maxDets)
 37 |         self.params = p
 38 | 
 39 |         self._prepare()
 40 | 
 41 |         # loop through images, area range, max detection number
 42 |         catIds = p.catIds if p.useCats else [-1]
 43 | 
 44 |         if p.iouType == "segm" or p.iouType == "bbox":
 45 |             computeIoU = self.computeIoU
 46 |         elif p.iouType == "keypoints":
 47 |             computeIoU = self.computeOks
 48 |         self.ious = {
 49 |             (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
 50 |         }
 51 | 
 52 |         maxDet = p.maxDets[-1]
 53 | 
 54 |         # <<<< Beginning of code differences with original COCO API
 55 |         def convert_instances_to_cpp(instances, is_det=False):
 56 |             # Convert annotations for a list of instances in an image to a format that's fast
 57 |             # to access in C++
 58 |             instances_cpp = []
 59 |             for instance in instances:
 60 |                 instance_cpp = _C.InstanceAnnotation(
 61 |                     int(instance["id"]),
 62 |                     instance["score"] if is_det else instance.get("score", 0.0),
 63 |                     instance["area"],
 64 |                     bool(instance.get("iscrowd", 0)),
 65 |                     bool(instance.get("ignore", 0)),
 66 |                 )
 67 |                 instances_cpp.append(instance_cpp)
 68 |             return instances_cpp
 69 | 
 70 |         # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
 71 |         ground_truth_instances = [
 72 |             [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
 73 |             for imgId in p.imgIds
 74 |         ]
 75 |         detected_instances = [
 76 |             [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
 77 |             for imgId in p.imgIds
 78 |         ]
 79 |         ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
 80 | 
 81 |         if not p.useCats:
 82 |             # For each image, flatten per-category lists into a single list
 83 |             ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
 84 |             detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
 85 | 
 86 |         # Call C++ implementation of self.evaluateImgs()
 87 |         self._evalImgs_cpp = _C.COCOevalEvaluateImages(
 88 |             p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
 89 |         )
 90 |         self._evalImgs = None
 91 | 
 92 |         self._paramsEval = copy.deepcopy(self.params)
 93 |         toc = time.time()
 94 |         print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
 95 |         # >>>> End of code differences with original COCO API
 96 | 
 97 |     def accumulate(self):
 98 |         """
 99 |         Accumulate per image evaluation results and store the result in self.eval.  Does not
100 |         support changing parameter settings from those used by self.evaluate()
101 |         """
102 |         print("Accumulating evaluation results...")
103 |         tic = time.time()
104 |         if not hasattr(self, "_evalImgs_cpp"):
105 |             print("Please run evaluate() first")
106 | 
107 |         self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
108 | 
109 |         # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
110 |         self.eval["recall"] = np.array(self.eval["recall"]).reshape(
111 |             self.eval["counts"][:1] + self.eval["counts"][2:]
112 |         )
113 | 
114 |         # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
115 |         # num_area_ranges X num_max_detections
116 |         self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
117 |         self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
118 |         toc = time.time()
119 |         print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
120 | 


--------------------------------------------------------------------------------
/detectron2/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | import numpy as np
 4 | import pprint
 5 | import sys
 6 | from collections import OrderedDict
 7 | from collections.abc import Mapping
 8 | 
 9 | 
10 | def print_csv_format(results):
11 |     """
12 |     Print main metrics in a format similar to Detectron,
13 |     so that they are easy to copypaste into a spreadsheet.
14 | 
15 |     Args:
16 |         results (OrderedDict[dict]): task_name -> {metric -> score}
17 |     """
18 |     assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
19 |     logger = logging.getLogger(__name__)
20 |     for task, res in results.items():
21 |         # Don't print "AP-category" metrics since they are usually not tracked.
22 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
23 |         logger.info("copypaste: Task: {}".format(task))
24 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
25 |         logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
26 | 
27 | 
28 | def verify_results(cfg, results):
29 |     """
30 |     Args:
31 |         results (OrderedDict[dict]): task_name -> {metric -> score}
32 | 
33 |     Returns:
34 |         bool: whether the verification succeeds or not
35 |     """
36 |     expected_results = cfg.TEST.EXPECTED_RESULTS
37 |     if not len(expected_results):
38 |         return True
39 | 
40 |     ok = True
41 |     for task, metric, expected, tolerance in expected_results:
42 |         actual = results[task].get(metric, None)
43 |         if actual is None:
44 |             ok = False
45 |             continue
46 |         if not np.isfinite(actual):
47 |             ok = False
48 |             continue
49 |         diff = abs(actual - expected)
50 |         if diff > tolerance:
51 |             ok = False
52 | 
53 |     logger = logging.getLogger(__name__)
54 |     if not ok:
55 |         logger.error("Result verification failed!")
56 |         logger.error("Expected Results: " + str(expected_results))
57 |         logger.error("Actual Results: " + pprint.pformat(results))
58 | 
59 |         sys.exit(1)
60 |     else:
61 |         logger.info("Results verification passed.")
62 |     return ok
63 | 
64 | 
65 | def flatten_results_dict(results):
66 |     """
67 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
68 |     If results[k1][k2][k3] = v, the returned dict will have the entry
69 |     {"k1/k2/k3": v}.
70 | 
71 |     Args:
72 |         results (dict):
73 |     """
74 |     r = {}
75 |     for k, v in results.items():
76 |         if isinstance(v, Mapping):
77 |             v = flatten_results_dict(v)
78 |             for kk, vv in v.items():
79 |                 r[k + "/" + kk] = vv
80 |         else:
81 |             r[k] = v
82 |     return r
83 | 


--------------------------------------------------------------------------------
/detectron2/export/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory contains code to prepare a detectron2 model for deployment.
 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
 4 | 
 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
 6 | 
 7 | 
 8 | ### Acknowledgements
 9 | 
10 | Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
11 | 
12 | Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
13 | help export Detectron2 models to TorchScript.
14 | 


--------------------------------------------------------------------------------
/detectron2/export/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .api import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/detectron2/export/caffe2_inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import logging
  4 | import numpy as np
  5 | from itertools import count
  6 | import torch
  7 | from caffe2.proto import caffe2_pb2
  8 | from caffe2.python import core
  9 | 
 10 | from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
 11 | from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | # ===== ref: mobile-vision's 'Caffe2Wrapper' class ======
 17 | class ProtobufModel(torch.nn.Module):
 18 |     """
 19 |     Wrapper of a caffe2's protobuf model.
 20 |     It works just like nn.Module, but running caffe2 under the hood.
 21 |     Input/Output are Dict[str, tensor] whose keys are in external_input/output.
 22 |     """
 23 | 
 24 |     _ids = count(0)
 25 | 
 26 |     def __init__(self, predict_net, init_net):
 27 |         logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...")
 28 |         super().__init__()
 29 |         assert isinstance(predict_net, caffe2_pb2.NetDef)
 30 |         assert isinstance(init_net, caffe2_pb2.NetDef)
 31 |         # create unique temporary workspace for each instance
 32 |         self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids))
 33 |         self.net = core.Net(predict_net)
 34 | 
 35 |         logger.info("Running init_net once to fill the parameters ...")
 36 |         with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
 37 |             ws.RunNetOnce(init_net)
 38 |             uninitialized_external_input = []
 39 |             for blob in self.net.Proto().external_input:
 40 |                 if blob not in ws.Blobs():
 41 |                     uninitialized_external_input.append(blob)
 42 |                     ws.CreateBlob(blob)
 43 |             ws.CreateNet(self.net)
 44 | 
 45 |         self._error_msgs = set()
 46 |         self._input_blobs = uninitialized_external_input
 47 | 
 48 |     def _infer_output_devices(self, inputs):
 49 |         """
 50 |         Returns:
 51 |             list[str]: list of device for each external output
 52 |         """
 53 | 
 54 |         def _get_device_type(torch_tensor):
 55 |             assert torch_tensor.device.type in ["cpu", "cuda"]
 56 |             assert torch_tensor.device.index == 0
 57 |             return torch_tensor.device.type
 58 | 
 59 |         predict_net = self.net.Proto()
 60 |         input_device_types = {
 61 |             (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs)
 62 |         }
 63 |         device_type_map = infer_device_type(
 64 |             predict_net, known_status=input_device_types, device_name_style="pytorch"
 65 |         )
 66 |         ssa, versions = core.get_ssa(predict_net)
 67 |         versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
 68 |         output_devices = [device_type_map[outp] for outp in versioned_outputs]
 69 |         return output_devices
 70 | 
 71 |     def forward(self, inputs):
 72 |         """
 73 |         Args:
 74 |             inputs (tuple[torch.Tensor])
 75 | 
 76 |         Returns:
 77 |             dict[str, torch.Tensor]
 78 |         """
 79 |         assert len(inputs) == len(self._input_blobs), (
 80 |             f"Length of inputs ({len(inputs)}) "
 81 |             f"doesn't match the required input blobs: {self._input_blobs}"
 82 |         )
 83 | 
 84 |         with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
 85 |             for b, tensor in zip(self._input_blobs, inputs):
 86 |                 ws.FeedBlob(b, tensor)
 87 | 
 88 |             try:
 89 |                 ws.RunNet(self.net.Proto().name)
 90 |             except RuntimeError as e:
 91 |                 if not str(e) in self._error_msgs:
 92 |                     self._error_msgs.add(str(e))
 93 |                     logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
 94 |                 logger.warning("Catch the error and use partial results.")
 95 | 
 96 |             c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output]
 97 |             # Remove outputs of current run, this is necessary in order to
 98 |             # prevent fetching the result from previous run if the model fails
 99 |             # in the middle.
100 |             for b in self.net.Proto().external_output:
101 |                 # Needs to create uninitialized blob to make the net runable.
102 |                 # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
103 |                 # but there'no such API.
104 |                 ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).")
105 | 
106 |         # Cast output to torch.Tensor on the desired device
107 |         output_devices = (
108 |             self._infer_output_devices(inputs)
109 |             if any(t.device.type != "cpu" for t in inputs)
110 |             else ["cpu" for _ in self.net.Proto().external_output]
111 |         )
112 | 
113 |         outputs = []
114 |         for name, c2_output, device in zip(
115 |             self.net.Proto().external_output, c2_outputs, output_devices
116 |         ):
117 |             if not isinstance(c2_output, np.ndarray):
118 |                 raise RuntimeError(
119 |                     "Invalid output for blob {}, received: {}".format(name, c2_output)
120 |                 )
121 |             outputs.append(torch.Tensor(c2_output).to(device=device))
122 |         # TODO change to tuple in the future
123 |         return dict(zip(self.net.Proto().external_output, outputs))
124 | 
125 | 
126 | class ProtobufDetectionModel(torch.nn.Module):
127 |     """
128 |     A class works just like a pytorch meta arch in terms of inference, but running
129 |     caffe2 model under the hood.
130 |     """
131 | 
132 |     def __init__(self, predict_net, init_net, *, convert_outputs=None):
133 |         """
134 |         Args:
135 |             predict_net, init_net (core.Net): caffe2 nets
136 |             convert_outptus (callable): a function that converts caffe2
137 |                 outputs to the same format of the original pytorch model.
138 |                 By default, use the one defined in the caffe2 meta_arch.
139 |         """
140 |         super().__init__()
141 |         self.protobuf_model = ProtobufModel(predict_net, init_net)
142 |         self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
143 |         self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
144 | 
145 |         if convert_outputs is None:
146 |             meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
147 |             meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
148 |             self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
149 |         else:
150 |             self._convert_outputs = convert_outputs
151 | 
152 |     def _convert_inputs(self, batched_inputs):
153 |         # currently all models convert inputs in the same way
154 |         return convert_batched_inputs_to_c2_format(
155 |             batched_inputs, self.size_divisibility, self.device
156 |         )
157 | 
158 |     def forward(self, batched_inputs):
159 |         c2_inputs = self._convert_inputs(batched_inputs)
160 |         c2_results = self.protobuf_model(c2_inputs)
161 |         return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
162 | 


--------------------------------------------------------------------------------
/detectron2/export/caffe2_patch.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import contextlib
  4 | from unittest import mock
  5 | import torch
  6 | 
  7 | from detectron2.modeling import poolers
  8 | from detectron2.modeling.proposal_generator import rpn
  9 | from detectron2.modeling.roi_heads import keypoint_head, mask_head
 10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
 11 | 
 12 | from .c10 import (
 13 |     Caffe2Compatible,
 14 |     Caffe2FastRCNNOutputsInference,
 15 |     Caffe2KeypointRCNNInference,
 16 |     Caffe2MaskRCNNInference,
 17 |     Caffe2ROIPooler,
 18 |     Caffe2RPN,
 19 | )
 20 | 
 21 | 
 22 | class GenericMixin(object):
 23 |     pass
 24 | 
 25 | 
 26 | class Caffe2CompatibleConverter(object):
 27 |     """
 28 |     A GenericUpdater which implements the `create_from` interface, by modifying
 29 |     module object and assign it with another class replaceCls.
 30 |     """
 31 | 
 32 |     def __init__(self, replaceCls):
 33 |         self.replaceCls = replaceCls
 34 | 
 35 |     def create_from(self, module):
 36 |         # update module's class to the new class
 37 |         assert isinstance(module, torch.nn.Module)
 38 |         if issubclass(self.replaceCls, GenericMixin):
 39 |             # replaceCls should act as mixin, create a new class on-the-fly
 40 |             new_class = type(
 41 |                 "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
 42 |                 (self.replaceCls, module.__class__),
 43 |                 {},  # {"new_method": lambda self: ...},
 44 |             )
 45 |             module.__class__ = new_class
 46 |         else:
 47 |             # replaceCls is complete class, this allow arbitrary class swap
 48 |             module.__class__ = self.replaceCls
 49 | 
 50 |         # initialize Caffe2Compatible
 51 |         if isinstance(module, Caffe2Compatible):
 52 |             module.tensor_mode = False
 53 | 
 54 |         return module
 55 | 
 56 | 
 57 | def patch(model, target, updater, *args, **kwargs):
 58 |     """
 59 |     recursively (post-order) update all modules with the target type and its
 60 |     subclasses, make a initialization/composition/inheritance/... via the
 61 |     updater.create_from.
 62 |     """
 63 |     for name, module in model.named_children():
 64 |         model._modules[name] = patch(module, target, updater, *args, **kwargs)
 65 |     if isinstance(model, target):
 66 |         return updater.create_from(model, *args, **kwargs)
 67 |     return model
 68 | 
 69 | 
 70 | def patch_generalized_rcnn(model):
 71 |     ccc = Caffe2CompatibleConverter
 72 |     model = patch(model, rpn.RPN, ccc(Caffe2RPN))
 73 |     model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
 74 | 
 75 |     return model
 76 | 
 77 | 
 78 | @contextlib.contextmanager
 79 | def mock_fastrcnn_outputs_inference(
 80 |     tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
 81 | ):
 82 |     with mock.patch.object(
 83 |         box_predictor_type,
 84 |         "inference",
 85 |         autospec=True,
 86 |         side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
 87 |     ) as mocked_func:
 88 |         yield
 89 |     if check:
 90 |         assert mocked_func.call_count > 0
 91 | 
 92 | 
 93 | @contextlib.contextmanager
 94 | def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
 95 |     with mock.patch(
 96 |         "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
 97 |     ) as mocked_func:
 98 |         yield
 99 |     if check:
100 |         assert mocked_func.call_count > 0
101 | 
102 | 
103 | @contextlib.contextmanager
104 | def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
105 |     with mock.patch(
106 |         "{}.keypoint_rcnn_inference".format(patched_module),
107 |         side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
108 |     ) as mocked_func:
109 |         yield
110 |     if check:
111 |         assert mocked_func.call_count > 0
112 | 
113 | 
114 | class ROIHeadsPatcher:
115 |     def __init__(self, heads, use_heatmap_max_keypoint):
116 |         self.heads = heads
117 |         self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
118 | 
119 |     @contextlib.contextmanager
120 |     def mock_roi_heads(self, tensor_mode=True):
121 |         """
122 |         Patching several inference functions inside ROIHeads and its subclasses
123 | 
124 |         Args:
125 |             tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
126 |                 format or not. Default to True.
127 |         """
128 |         # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
129 |         # are called inside the same file as BaseXxxHead due to using mock.patch.
130 |         kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
131 |         mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
132 | 
133 |         mock_ctx_managers = [
134 |             mock_fastrcnn_outputs_inference(
135 |                 tensor_mode=tensor_mode,
136 |                 check=True,
137 |                 box_predictor_type=type(self.heads.box_predictor),
138 |             )
139 |         ]
140 |         if getattr(self.heads, "keypoint_on", False):
141 |             mock_ctx_managers += [
142 |                 mock_keypoint_rcnn_inference(
143 |                     tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
144 |                 )
145 |             ]
146 |         if getattr(self.heads, "mask_on", False):
147 |             mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
148 | 
149 |         with contextlib.ExitStack() as stack:  # python 3.3+
150 |             for mgr in mock_ctx_managers:
151 |                 stack.enter_context(mgr)
152 |             yield
153 | 


--------------------------------------------------------------------------------
/detectron2/export/torchscript.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import torch
  5 | 
  6 | from detectron2.utils.file_io import PathManager
  7 | 
  8 | from .torchscript_patch import patch_instances, patch_nonscriptable_classes
  9 | 
 10 | 
 11 | def export_torchscript_with_instances(model, fields):
 12 |     """
 13 |     Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
 14 |     attributes of :class:`Instances` are "dynamically" added in eager mode，it is difficult
 15 |     for torchscript to support it out of the box. This function is made to support scripting
 16 |     a model that uses :class:`Instances`. It does the following:
 17 | 
 18 |     1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
 19 |        but with all attributes been "static".
 20 |        The attributes need to be statically declared in the ``fields`` argument.
 21 |     2. Register ``new_Instances`` to torchscript, and force torchscript to
 22 |        use it when trying to compile ``Instances``.
 23 | 
 24 |     After this function, the process will be reverted. User should be able to script another model
 25 |     using different fields.
 26 | 
 27 |     Example:
 28 |         Assume that ``Instances`` in the model consist of two attributes named
 29 |         ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
 30 |         :class:`Tensor` respectively during inference. You can call this function like:
 31 | 
 32 |         ::
 33 |             fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
 34 |             torchscipt_model =  export_torchscript_with_instances(model, fields)
 35 | 
 36 |     Note:
 37 |         Currently we only support models in evaluation mode.
 38 | 
 39 |     Args:
 40 |         model (nn.Module): The input model to be exported to torchscript.
 41 |         fields (Dict[str, type]): Attribute names and corresponding type that
 42 |             ``Instances`` will use in the model. Note that all attributes used in ``Instances``
 43 |             need to be added, regarldess of whether they are inputs/outputs of the model.
 44 |             Data type not defined in detectron2 is not supported for now.
 45 | 
 46 |     Returns:
 47 |         torch.jit.ScriptModule: the input model in torchscript format
 48 |     """
 49 |     patch_nonscriptable_classes()
 50 | 
 51 |     assert (
 52 |         not model.training
 53 |     ), "Currently we only support exporting models in evaluation mode to torchscript"
 54 | 
 55 |     with patch_instances(fields):
 56 |         scripted_model = torch.jit.script(model)
 57 |         return scripted_model
 58 | 
 59 | 
 60 | def dump_torchscript_IR(model, dir):
 61 |     """
 62 |     Dump IR of a TracedModule/ScriptModule at various levels.
 63 |     Useful for debugging.
 64 | 
 65 |     Args:
 66 |         model (TracedModule or ScriptModule): traced or scripted module
 67 |         dir (str): output directory to dump files.
 68 |     """
 69 |     PathManager.mkdirs(dir)
 70 | 
 71 |     def _get_script_mod(mod):
 72 |         if isinstance(mod, torch.jit.TracedModule):
 73 |             return mod._actual_script_module
 74 |         return mod
 75 | 
 76 |     # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
 77 |     with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
 78 | 
 79 |         def get_code(mod):
 80 |             # Try a few ways to get code using private attributes.
 81 |             try:
 82 |                 # This contains more information than just `mod.code`
 83 |                 return _get_script_mod(mod)._c.code
 84 |             except AttributeError:
 85 |                 pass
 86 |             try:
 87 |                 return mod.code
 88 |             except AttributeError:
 89 |                 return None
 90 | 
 91 |         def dump_code(prefix, mod):
 92 |             code = get_code(mod)
 93 |             name = prefix or "root model"
 94 |             if code is None:
 95 |                 f.write(f"Could not found code for {name} (type={mod.original_name})\n")
 96 |                 f.write("\n")
 97 |             else:
 98 |                 f.write(f"\nCode for {name}, type={mod.original_name}:\n")
 99 |                 f.write(code)
100 |                 f.write("\n")
101 |                 f.write("-" * 80)
102 | 
103 |             for name, m in mod.named_children():
104 |                 dump_code(prefix + "." + name, m)
105 | 
106 |         dump_code("", model)
107 | 
108 |     # Recursively dump IR of all modules
109 |     with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
110 |         try:
111 |             f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
112 |         except AttributeError:
113 |             pass
114 | 
115 |     # Dump IR of the entire graph (all submodules inlined)
116 |     with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
117 |         f.write(str(model.inlined_graph))
118 | 
119 |     # Dump the model structure in pytorch style
120 |     with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
121 |         f.write(str(model))
122 | 


--------------------------------------------------------------------------------
/detectron2/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
 3 | from .deform_conv import DeformConv, ModulatedDeformConv
 4 | from .mask_ops import paste_masks_in_image
 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
 6 | from .roi_align import ROIAlign, roi_align
 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
 8 | from .shape_spec import ShapeSpec
 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple
10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
11 | from .aspp import ASPP
12 | 
13 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
14 | 


--------------------------------------------------------------------------------
/detectron2/layers/aspp.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | from copy import deepcopy
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | 
  9 | from .batch_norm import get_norm
 10 | from .wrappers import Conv2d
 11 | 
 12 | 
 13 | class ASPP(nn.Module):
 14 |     """
 15 |     Atrous Spatial Pyramid Pooling (ASPP).
 16 |     """
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         in_channels,
 21 |         out_channels,
 22 |         dilations,
 23 |         *,
 24 |         norm,
 25 |         activation,
 26 |         pool_kernel_size=None,
 27 |         dropout: float = 0.0,
 28 |     ):
 29 |         """
 30 |         Args:
 31 |             in_channels (int): number of input channels for ASPP.
 32 |             out_channels (int): number of output channels.
 33 |             dilations (list): a list of 3 dilations in ASPP.
 34 |             norm (str or callable): normalization for all conv layers.
 35 |                 See :func:`layers.get_norm` for supported format. norm is
 36 |                 applied to all conv layers except the conv following
 37 |                 global average pooling.
 38 |             activation (callable): activation function.
 39 |             pool_kernel_size (tuple, list): the average pooling size (kh, kw)
 40 |                 for image pooling layer in ASPP. If set to None, it always
 41 |                 performs global average pooling. If not None, it must be
 42 |                 divisible by the shape of inputs in forward(). It is recommended
 43 |                 to use a fixed input feature size in training, and set this
 44 |                 option to match this size, so that it performs global average
 45 |                 pooling in training, and the size of the pooling window stays
 46 |                 consistent in inference.
 47 |             dropout (float): apply dropout on the output of ASPP. It is used in
 48 |                 the official DeepLab implementation with a rate of 0.1:
 49 |                 https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532  # noqa
 50 |         """
 51 |         super(ASPP, self).__init__()
 52 |         assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
 53 |         self.pool_kernel_size = pool_kernel_size
 54 |         self.dropout = dropout
 55 |         use_bias = norm == ""
 56 |         self.convs = nn.ModuleList()
 57 |         # conv 1x1
 58 |         self.convs.append(
 59 |             Conv2d(
 60 |                 in_channels,
 61 |                 out_channels,
 62 |                 kernel_size=1,
 63 |                 bias=use_bias,
 64 |                 norm=get_norm(norm, out_channels),
 65 |                 activation=deepcopy(activation),
 66 |             )
 67 |         )
 68 |         weight_init.c2_xavier_fill(self.convs[-1])
 69 |         # atrous convs
 70 |         for dilation in dilations:
 71 |             self.convs.append(
 72 |                 Conv2d(
 73 |                     in_channels,
 74 |                     out_channels,
 75 |                     kernel_size=3,
 76 |                     padding=dilation,
 77 |                     dilation=dilation,
 78 |                     bias=use_bias,
 79 |                     norm=get_norm(norm, out_channels),
 80 |                     activation=deepcopy(activation),
 81 |                 )
 82 |             )
 83 |             weight_init.c2_xavier_fill(self.convs[-1])
 84 |         # image pooling
 85 |         # We do not add BatchNorm because the spatial resolution is 1x1,
 86 |         # the original TF implementation has BatchNorm.
 87 |         if pool_kernel_size is None:
 88 |             image_pooling = nn.Sequential(
 89 |                 nn.AdaptiveAvgPool2d(1),
 90 |                 Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
 91 |             )
 92 |         else:
 93 |             image_pooling = nn.Sequential(
 94 |                 nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
 95 |                 Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
 96 |             )
 97 |         weight_init.c2_xavier_fill(image_pooling[1])
 98 |         self.convs.append(image_pooling)
 99 | 
100 |         self.project = Conv2d(
101 |             5 * out_channels,
102 |             out_channels,
103 |             kernel_size=1,
104 |             bias=use_bias,
105 |             norm=get_norm(norm, out_channels),
106 |             activation=deepcopy(activation),
107 |         )
108 |         weight_init.c2_xavier_fill(self.project)
109 | 
110 |     def forward(self, x):
111 |         size = x.shape[-2:]
112 |         if self.pool_kernel_size is not None:
113 |             if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
114 |                 raise ValueError(
115 |                     "`pool_kernel_size` must be divisible by the shape of inputs. "
116 |                     "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
117 |                 )
118 |         res = []
119 |         for conv in self.convs:
120 |             res.append(conv(x))
121 |         res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
122 |         res = torch.cat(res, dim=1)
123 |         res = self.project(res)
124 |         res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
125 |         return res
126 | 


--------------------------------------------------------------------------------
/detectron2/layers/blocks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | import fvcore.nn.weight_init as weight_init
  5 | from torch import nn
  6 | 
  7 | from .batch_norm import FrozenBatchNorm2d, get_norm
  8 | from .wrappers import Conv2d
  9 | 
 10 | 
 11 | """
 12 | CNN building blocks.
 13 | """
 14 | 
 15 | 
 16 | class CNNBlockBase(nn.Module):
 17 |     """
 18 |     A CNN block is assumed to have input channels, output channels and a stride.
 19 |     The input and output of `forward()` method must be NCHW tensors.
 20 |     The method can perform arbitrary computation but must match the given
 21 |     channels and stride specification.
 22 | 
 23 |     Attribute:
 24 |         in_channels (int):
 25 |         out_channels (int):
 26 |         stride (int):
 27 |     """
 28 | 
 29 |     def __init__(self, in_channels, out_channels, stride):
 30 |         """
 31 |         The `__init__` method of any subclass should also contain these arguments.
 32 | 
 33 |         Args:
 34 |             in_channels (int):
 35 |             out_channels (int):
 36 |             stride (int):
 37 |         """
 38 |         super().__init__()
 39 |         self.in_channels = in_channels
 40 |         self.out_channels = out_channels
 41 |         self.stride = stride
 42 | 
 43 |     def freeze(self):
 44 |         """
 45 |         Make this block not trainable.
 46 |         This method sets all parameters to `requires_grad=False`,
 47 |         and convert all BatchNorm layers to FrozenBatchNorm
 48 | 
 49 |         Returns:
 50 |             the block itself
 51 |         """
 52 |         for p in self.parameters():
 53 |             p.requires_grad = False
 54 |         FrozenBatchNorm2d.convert_frozen_batchnorm(self)
 55 |         return self
 56 | 
 57 | 
 58 | class DepthwiseSeparableConv2d(nn.Module):
 59 |     """
 60 |     A kxk depthwise convolution + a 1x1 convolution.
 61 | 
 62 |     In :paper:`xception`, norm & activation are applied on the second conv.
 63 |     :paper:`mobilenet` uses norm & activation on both convs.
 64 |     """
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         in_channels,
 69 |         out_channels,
 70 |         kernel_size=3,
 71 |         padding=1,
 72 |         *,
 73 |         norm1=None,
 74 |         activation1=None,
 75 |         norm2=None,
 76 |         activation2=None,
 77 |     ):
 78 |         """
 79 |         Args:
 80 |             norm1, norm2 (str or callable): normalization for the two conv layers.
 81 |             activation1, activation2 (callable(Tensor) -> Tensor): activation
 82 |                 function for the two conv layers.
 83 |         """
 84 |         super().__init__()
 85 |         self.depthwise = Conv2d(
 86 |             in_channels,
 87 |             in_channels,
 88 |             kernel_size=kernel_size,
 89 |             padding=padding,
 90 |             groups=in_channels,
 91 |             bias=not norm1,
 92 |             norm=get_norm(norm1, in_channels),
 93 |             activation=activation1,
 94 |         )
 95 |         self.pointwise = Conv2d(
 96 |             in_channels,
 97 |             out_channels,
 98 |             kernel_size=1,
 99 |             bias=not norm2,
100 |             norm=get_norm(norm2, out_channels),
101 |             activation=activation2,
102 |         )
103 | 
104 |         # default initialization
105 |         weight_init.c2_msra_fill(self.depthwise)
106 |         weight_init.c2_msra_fill(self.pointwise)
107 | 
108 |     def forward(self, x):
109 |         return self.pointwise(self.depthwise(x))
110 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To add a new Op:
4 | 
5 | 1. Create a new directory
6 | 2. Implement new ops there
7 | 3. Delcare its Python interface in `vision.cpp`.
8 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/ROIAlign/ROIAlign.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace detectron2 {
  6 | 
  7 | at::Tensor ROIAlign_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio,
 14 |     bool aligned);
 15 | 
 16 | at::Tensor ROIAlign_backward_cpu(
 17 |     const at::Tensor& grad,
 18 |     const at::Tensor& rois,
 19 |     const float spatial_scale,
 20 |     const int pooled_height,
 21 |     const int pooled_width,
 22 |     const int batch_size,
 23 |     const int channels,
 24 |     const int height,
 25 |     const int width,
 26 |     const int sampling_ratio,
 27 |     bool aligned);
 28 | 
 29 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 30 | at::Tensor ROIAlign_forward_cuda(
 31 |     const at::Tensor& input,
 32 |     const at::Tensor& rois,
 33 |     const float spatial_scale,
 34 |     const int pooled_height,
 35 |     const int pooled_width,
 36 |     const int sampling_ratio,
 37 |     bool aligned);
 38 | 
 39 | at::Tensor ROIAlign_backward_cuda(
 40 |     const at::Tensor& grad,
 41 |     const at::Tensor& rois,
 42 |     const float spatial_scale,
 43 |     const int pooled_height,
 44 |     const int pooled_width,
 45 |     const int batch_size,
 46 |     const int channels,
 47 |     const int height,
 48 |     const int width,
 49 |     const int sampling_ratio,
 50 |     bool aligned);
 51 | #endif
 52 | 
 53 | // Interface for Python
 54 | inline at::Tensor ROIAlign_forward(
 55 |     const at::Tensor& input,
 56 |     const at::Tensor& rois,
 57 |     const float spatial_scale,
 58 |     const int pooled_height,
 59 |     const int pooled_width,
 60 |     const int sampling_ratio,
 61 |     bool aligned) {
 62 |   if (input.is_cuda()) {
 63 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 64 |     return ROIAlign_forward_cuda(
 65 |         input,
 66 |         rois,
 67 |         spatial_scale,
 68 |         pooled_height,
 69 |         pooled_width,
 70 |         sampling_ratio,
 71 |         aligned);
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   return ROIAlign_forward_cpu(
 77 |       input,
 78 |       rois,
 79 |       spatial_scale,
 80 |       pooled_height,
 81 |       pooled_width,
 82 |       sampling_ratio,
 83 |       aligned);
 84 | }
 85 | 
 86 | inline at::Tensor ROIAlign_backward(
 87 |     const at::Tensor& grad,
 88 |     const at::Tensor& rois,
 89 |     const float spatial_scale,
 90 |     const int pooled_height,
 91 |     const int pooled_width,
 92 |     const int batch_size,
 93 |     const int channels,
 94 |     const int height,
 95 |     const int width,
 96 |     const int sampling_ratio,
 97 |     bool aligned) {
 98 |   if (grad.is_cuda()) {
 99 | #if defined(WITH_CUDA) || defined(WITH_HIP)
100 |     return ROIAlign_backward_cuda(
101 |         grad,
102 |         rois,
103 |         spatial_scale,
104 |         pooled_height,
105 |         pooled_width,
106 |         batch_size,
107 |         channels,
108 |         height,
109 |         width,
110 |         sampling_ratio,
111 |         aligned);
112 | #else
113 |     AT_ERROR("Not compiled with GPU support");
114 | #endif
115 |   }
116 |   return ROIAlign_backward_cpu(
117 |       grad,
118 |       rois,
119 |       spatial_scale,
120 |       pooled_height,
121 |       pooled_width,
122 |       batch_size,
123 |       channels,
124 |       height,
125 |       width,
126 |       sampling_ratio,
127 |       aligned);
128 | }
129 | 
130 | } // namespace detectron2
131 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace detectron2 {
  6 | 
  7 | at::Tensor ROIAlignRotated_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio);
 14 | 
 15 | at::Tensor ROIAlignRotated_backward_cpu(
 16 |     const at::Tensor& grad,
 17 |     const at::Tensor& rois,
 18 |     const float spatial_scale,
 19 |     const int pooled_height,
 20 |     const int pooled_width,
 21 |     const int batch_size,
 22 |     const int channels,
 23 |     const int height,
 24 |     const int width,
 25 |     const int sampling_ratio);
 26 | 
 27 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 28 | at::Tensor ROIAlignRotated_forward_cuda(
 29 |     const at::Tensor& input,
 30 |     const at::Tensor& rois,
 31 |     const float spatial_scale,
 32 |     const int pooled_height,
 33 |     const int pooled_width,
 34 |     const int sampling_ratio);
 35 | 
 36 | at::Tensor ROIAlignRotated_backward_cuda(
 37 |     const at::Tensor& grad,
 38 |     const at::Tensor& rois,
 39 |     const float spatial_scale,
 40 |     const int pooled_height,
 41 |     const int pooled_width,
 42 |     const int batch_size,
 43 |     const int channels,
 44 |     const int height,
 45 |     const int width,
 46 |     const int sampling_ratio);
 47 | #endif
 48 | 
 49 | // Interface for Python
 50 | inline at::Tensor ROIAlignRotated_forward(
 51 |     const at::Tensor& input,
 52 |     const at::Tensor& rois,
 53 |     const float spatial_scale,
 54 |     const int pooled_height,
 55 |     const int pooled_width,
 56 |     const int sampling_ratio) {
 57 |   if (input.is_cuda()) {
 58 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 59 |     return ROIAlignRotated_forward_cuda(
 60 |         input,
 61 |         rois,
 62 |         spatial_scale,
 63 |         pooled_height,
 64 |         pooled_width,
 65 |         sampling_ratio);
 66 | #else
 67 |     AT_ERROR("Not compiled with GPU support");
 68 | #endif
 69 |   }
 70 |   return ROIAlignRotated_forward_cpu(
 71 |       input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
 72 | }
 73 | 
 74 | inline at::Tensor ROIAlignRotated_backward(
 75 |     const at::Tensor& grad,
 76 |     const at::Tensor& rois,
 77 |     const float spatial_scale,
 78 |     const int pooled_height,
 79 |     const int pooled_width,
 80 |     const int batch_size,
 81 |     const int channels,
 82 |     const int height,
 83 |     const int width,
 84 |     const int sampling_ratio) {
 85 |   if (grad.is_cuda()) {
 86 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 87 |     return ROIAlignRotated_backward_cuda(
 88 |         grad,
 89 |         rois,
 90 |         spatial_scale,
 91 |         pooled_height,
 92 |         pooled_width,
 93 |         batch_size,
 94 |         channels,
 95 |         height,
 96 |         width,
 97 |         sampling_ratio);
 98 | #else
 99 |     AT_ERROR("Not compiled with GPU support");
100 | #endif
101 |   }
102 |   return ROIAlignRotated_backward_cpu(
103 |       grad,
104 |       rois,
105 |       spatial_scale,
106 |       pooled_height,
107 |       pooled_width,
108 |       batch_size,
109 |       channels,
110 |       height,
111 |       width,
112 |       sampling_ratio);
113 | }
114 | 
115 | } // namespace detectron2
116 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor box_iou_rotated_cpu(
 8 |     const at::Tensor& boxes1,
 9 |     const at::Tensor& boxes2);
10 | 
11 | #if defined(WITH_CUDA) || defined(WITH_HIP)
12 | at::Tensor box_iou_rotated_cuda(
13 |     const at::Tensor& boxes1,
14 |     const at::Tensor& boxes2);
15 | #endif
16 | 
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 |     const at::Tensor& boxes1,
22 |     const at::Tensor& boxes2) {
23 |   assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 |   if (boxes1.device().is_cuda()) {
25 | #if defined(WITH_CUDA) || defined(WITH_HIP)
26 |     return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous());
27 | #else
28 |     AT_ERROR("Not compiled with GPU support");
29 | #endif
30 |   }
31 | 
32 |   return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous());
33 | }
34 | 
35 | } // namespace detectron2
36 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | #include "box_iou_rotated.h"
 3 | #include "box_iou_rotated_utils.h"
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | template <typename T>
 8 | void box_iou_rotated_cpu_kernel(
 9 |     const at::Tensor& boxes1,
10 |     const at::Tensor& boxes2,
11 |     at::Tensor& ious) {
12 |   auto num_boxes1 = boxes1.size(0);
13 |   auto num_boxes2 = boxes2.size(0);
14 | 
15 |   for (int i = 0; i < num_boxes1; i++) {
16 |     for (int j = 0; j < num_boxes2; j++) {
17 |       ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
18 |           boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
19 |     }
20 |   }
21 | }
22 | 
23 | at::Tensor box_iou_rotated_cpu(
24 |     // input must be contiguous:
25 |     const at::Tensor& boxes1,
26 |     const at::Tensor& boxes2) {
27 |   auto num_boxes1 = boxes1.size(0);
28 |   auto num_boxes2 = boxes2.size(0);
29 |   at::Tensor ious =
30 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
31 | 
32 |   box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
33 | 
34 |   // reshape from 1d array to 2d array
35 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
36 |   return ious.reshape(shape);
37 | }
38 | 
39 | } // namespace detectron2
40 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <c10/cuda/CUDAGuard.h>
  5 | #include <ATen/cuda/CUDAApplyUtils.cuh>
  6 | #include "box_iou_rotated_utils.h"
  7 | 
  8 | namespace detectron2 {
  9 | 
 10 | // 2D block with 32 * 16 = 512 threads per block
 11 | const int BLOCK_DIM_X = 32;
 12 | const int BLOCK_DIM_Y = 16;
 13 | 
 14 | template <typename T>
 15 | __global__ void box_iou_rotated_cuda_kernel(
 16 |     const int n_boxes1,
 17 |     const int n_boxes2,
 18 |     const T* dev_boxes1,
 19 |     const T* dev_boxes2,
 20 |     T* dev_ious) {
 21 |   const int row_start = blockIdx.x * blockDim.x;
 22 |   const int col_start = blockIdx.y * blockDim.y;
 23 | 
 24 |   const int row_size = min(n_boxes1 - row_start, blockDim.x);
 25 |   const int col_size = min(n_boxes2 - col_start, blockDim.y);
 26 | 
 27 |   __shared__ float block_boxes1[BLOCK_DIM_X * 5];
 28 |   __shared__ float block_boxes2[BLOCK_DIM_Y * 5];
 29 | 
 30 |   // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
 31 |   if (threadIdx.x < row_size && threadIdx.y == 0) {
 32 |     block_boxes1[threadIdx.x * 5 + 0] =
 33 |         dev_boxes1[(row_start + threadIdx.x) * 5 + 0];
 34 |     block_boxes1[threadIdx.x * 5 + 1] =
 35 |         dev_boxes1[(row_start + threadIdx.x) * 5 + 1];
 36 |     block_boxes1[threadIdx.x * 5 + 2] =
 37 |         dev_boxes1[(row_start + threadIdx.x) * 5 + 2];
 38 |     block_boxes1[threadIdx.x * 5 + 3] =
 39 |         dev_boxes1[(row_start + threadIdx.x) * 5 + 3];
 40 |     block_boxes1[threadIdx.x * 5 + 4] =
 41 |         dev_boxes1[(row_start + threadIdx.x) * 5 + 4];
 42 |   }
 43 | 
 44 |   if (threadIdx.x < col_size && threadIdx.y == 0) {
 45 |     block_boxes2[threadIdx.x * 5 + 0] =
 46 |         dev_boxes2[(col_start + threadIdx.x) * 5 + 0];
 47 |     block_boxes2[threadIdx.x * 5 + 1] =
 48 |         dev_boxes2[(col_start + threadIdx.x) * 5 + 1];
 49 |     block_boxes2[threadIdx.x * 5 + 2] =
 50 |         dev_boxes2[(col_start + threadIdx.x) * 5 + 2];
 51 |     block_boxes2[threadIdx.x * 5 + 3] =
 52 |         dev_boxes2[(col_start + threadIdx.x) * 5 + 3];
 53 |     block_boxes2[threadIdx.x * 5 + 4] =
 54 |         dev_boxes2[(col_start + threadIdx.x) * 5 + 4];
 55 |   }
 56 |   __syncthreads();
 57 | 
 58 |   if (threadIdx.x < row_size && threadIdx.y < col_size) {
 59 |     int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y;
 60 |     dev_ious[offset] = single_box_iou_rotated<T>(
 61 |         block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
 62 |   }
 63 | }
 64 | 
 65 | at::Tensor box_iou_rotated_cuda(
 66 |     // input must be contiguous
 67 |     const at::Tensor& boxes1,
 68 |     const at::Tensor& boxes2) {
 69 |   using scalar_t = float;
 70 |   AT_ASSERTM(
 71 |       boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor");
 72 |   AT_ASSERTM(
 73 |       boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor");
 74 |   AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor");
 75 |   AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor");
 76 |   at::cuda::CUDAGuard device_guard(boxes1.device());
 77 | 
 78 |   auto num_boxes1 = boxes1.size(0);
 79 |   auto num_boxes2 = boxes2.size(0);
 80 | 
 81 |   at::Tensor ious =
 82 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
 83 | 
 84 |   bool transpose = false;
 85 |   if (num_boxes1 > 0 && num_boxes2 > 0) {
 86 |     scalar_t *data1 = boxes1.data_ptr<scalar_t>(),
 87 |              *data2 = boxes2.data_ptr<scalar_t>();
 88 | 
 89 |     if (num_boxes2 > 65535 * BLOCK_DIM_Y) {
 90 |       AT_ASSERTM(
 91 |           num_boxes1 <= 65535 * BLOCK_DIM_Y,
 92 |           "Too many boxes for box_iou_rotated_cuda!");
 93 |       // x dim is allowed to be large, but y dim cannot,
 94 |       // so we transpose the two to avoid "invalid configuration argument"
 95 |       // error. We assume one of them is small. Otherwise the result is hard to
 96 |       // fit in memory anyway.
 97 |       std::swap(num_boxes1, num_boxes2);
 98 |       std::swap(data1, data2);
 99 |       transpose = true;
100 |     }
101 | 
102 |     const int blocks_x =
103 |         at::cuda::ATenCeilDiv(static_cast<int>(num_boxes1), BLOCK_DIM_X);
104 |     const int blocks_y =
105 |         at::cuda::ATenCeilDiv(static_cast<int>(num_boxes2), BLOCK_DIM_Y);
106 | 
107 |     dim3 blocks(blocks_x, blocks_y);
108 |     dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
109 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
110 | 
111 |     box_iou_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
112 |         num_boxes1,
113 |         num_boxes2,
114 |         data1,
115 |         data2,
116 |         (scalar_t*)ious.data_ptr<scalar_t>());
117 | 
118 |     AT_CUDA_CHECK(cudaGetLastError());
119 |   }
120 | 
121 |   // reshape from 1d array to 2d array
122 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
123 |   if (transpose) {
124 |     return ious.view(shape).t();
125 |   } else {
126 |     return ious.view(shape);
127 |   }
128 | }
129 | 
130 | } // namespace detectron2
131 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/cocoeval/cocoeval.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | #pragma once
 3 | 
 4 | #include <pybind11/numpy.h>
 5 | #include <pybind11/pybind11.h>
 6 | #include <pybind11/stl.h>
 7 | #include <pybind11/stl_bind.h>
 8 | #include <vector>
 9 | 
10 | namespace py = pybind11;
11 | 
12 | namespace detectron2 {
13 | 
14 | namespace COCOeval {
15 | 
16 | // Annotation data for a single object instance in an image
17 | struct InstanceAnnotation {
18 |   InstanceAnnotation(
19 |       uint64_t id,
20 |       double score,
21 |       double area,
22 |       bool is_crowd,
23 |       bool ignore)
24 |       : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
25 |   uint64_t id;
26 |   double score = 0.;
27 |   double area = 0.;
28 |   bool is_crowd = false;
29 |   bool ignore = false;
30 | };
31 | 
32 | // Stores intermediate results for evaluating detection results for a single
33 | // image that has D detected instances and G ground truth instances. This stores
34 | // matches between detected and ground truth instances
35 | struct ImageEvaluation {
36 |   // For each of the D detected instances, the id of the matched ground truth
37 |   // instance, or 0 if unmatched
38 |   std::vector<uint64_t> detection_matches;
39 | 
40 |   // The detection score of each of the D detected instances
41 |   std::vector<double> detection_scores;
42 | 
43 |   // Marks whether or not each of G instances was ignored from evaluation (e.g.,
44 |   // because it's outside area_range)
45 |   std::vector<bool> ground_truth_ignores;
46 | 
47 |   // Marks whether or not each of D instances was ignored from evaluation (e.g.,
48 |   // because it's outside aRng)
49 |   std::vector<bool> detection_ignores;
50 | };
51 | 
52 | template <class T>
53 | using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
54 | 
55 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
56 | // combination of image, category, area range settings, and IOU thresholds to
57 | // evaluate, it matches detected instances to ground truth instances and stores
58 | // the results into a vector of ImageEvaluation results, which will be
59 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall
60 | // curves.  The parameters of nested vectors have the following semantics:
61 | //   image_category_ious[i][c][d][g] is the intersection over union of the d'th
62 | //     detected instance and g'th ground truth instance of
63 | //     category category_ids[c] in image image_ids[i]
64 | //   image_category_ground_truth_instances[i][c] is a vector of ground truth
65 | //     instances in image image_ids[i] of category category_ids[c]
66 | //   image_category_detection_instances[i][c] is a vector of detected
67 | //     instances in image image_ids[i] of category category_ids[c]
68 | std::vector<ImageEvaluation> EvaluateImages(
69 |     const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
70 |     int max_detections,
71 |     const std::vector<double>& iou_thresholds,
72 |     const ImageCategoryInstances<std::vector<double>>& image_category_ious,
73 |     const ImageCategoryInstances<InstanceAnnotation>&
74 |         image_category_ground_truth_instances,
75 |     const ImageCategoryInstances<InstanceAnnotation>&
76 |         image_category_detection_instances);
77 | 
78 | // C++ implementation of COCOeval.accumulate(), which generates precision
79 | // recall curves for each set of category, IOU threshold, detection area range,
80 | // and max number of detections parameters.  It is assumed that the parameter
81 | // evaluations is the return value of the functon COCOeval::EvaluateImages(),
82 | // which was called with the same parameter settings params
83 | py::dict Accumulate(
84 |     const py::object& params,
85 |     const std::vector<ImageEvaluation>& evalutations);
86 | 
87 | } // namespace COCOeval
88 | } // namespace detectron2
89 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | namespace detectron2 {
 6 | int get_cudart_version() {
 7 | // Not a ROCM platform: Either HIP is not used, or
 8 | // it is used, but platform is not ROCM (i.e. it is CUDA)
 9 | #if !defined(__HIP_PLATFORM_HCC__)
10 |   return CUDART_VERSION;
11 | #else
12 |   int version = 0;
13 | 
14 | #if HIP_VERSION_MAJOR != 0
15 |   // Create a convention similar to that of CUDA, as assumed by other
16 |   // parts of the code.
17 | 
18 |   version = HIP_VERSION_MINOR;
19 |   version += (HIP_VERSION_MAJOR * 100);
20 | #else
21 |   hipRuntimeGetVersion(&version);
22 | #endif
23 |   return version;
24 | #endif
25 | }
26 | } // namespace detectron2
27 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor nms_rotated_cpu(
 8 |     const at::Tensor& dets,
 9 |     const at::Tensor& scores,
10 |     const float iou_threshold);
11 | 
12 | #if defined(WITH_CUDA) || defined(WITH_HIP)
13 | at::Tensor nms_rotated_cuda(
14 |     const at::Tensor& dets,
15 |     const at::Tensor& scores,
16 |     const float iou_threshold);
17 | #endif
18 | 
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 |     const at::Tensor& dets,
24 |     const at::Tensor& scores,
25 |     const float iou_threshold) {
26 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
27 |   if (dets.device().is_cuda()) {
28 | #if defined(WITH_CUDA) || defined(WITH_HIP)
29 |     return nms_rotated_cuda(
30 |         dets.contiguous(), scores.contiguous(), iou_threshold);
31 | #else
32 |     AT_ERROR("Not compiled with GPU support");
33 | #endif
34 |   }
35 | 
36 |   return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
37 | }
38 | 
39 | } // namespace detectron2
40 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
 3 | #include "nms_rotated.h"
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | template <typename scalar_t>
 8 | at::Tensor nms_rotated_cpu_kernel(
 9 |     const at::Tensor& dets,
10 |     const at::Tensor& scores,
11 |     const float iou_threshold) {
12 |   // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
13 |   // however, the code in this function is much shorter because
14 |   // we delegate the IoU computation for rotated boxes to
15 |   // the single_box_iou_rotated function in box_iou_rotated_utils.h
16 |   AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor");
17 |   AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor");
18 |   AT_ASSERTM(
19 |       dets.scalar_type() == scores.scalar_type(),
20 |       "dets should have the same type as scores");
21 | 
22 |   if (dets.numel() == 0) {
23 |     return at::empty({0}, dets.options().dtype(at::kLong));
24 |   }
25 | 
26 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
27 | 
28 |   auto ndets = dets.size(0);
29 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
30 |   at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
31 | 
32 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
33 |   auto keep = keep_t.data_ptr<int64_t>();
34 |   auto order = order_t.data_ptr<int64_t>();
35 | 
36 |   int64_t num_to_keep = 0;
37 | 
38 |   for (int64_t _i = 0; _i < ndets; _i++) {
39 |     auto i = order[_i];
40 |     if (suppressed[i] == 1) {
41 |       continue;
42 |     }
43 | 
44 |     keep[num_to_keep++] = i;
45 | 
46 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
47 |       auto j = order[_j];
48 |       if (suppressed[j] == 1) {
49 |         continue;
50 |       }
51 | 
52 |       auto ovr = single_box_iou_rotated<scalar_t>(
53 |           dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
54 |       if (ovr >= iou_threshold) {
55 |         suppressed[j] = 1;
56 |       }
57 |     }
58 |   }
59 |   return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
60 | }
61 | 
62 | at::Tensor nms_rotated_cpu(
63 |     // input must be contiguous
64 |     const at::Tensor& dets,
65 |     const at::Tensor& scores,
66 |     const float iou_threshold) {
67 |   auto result = at::empty({0}, dets.options());
68 | 
69 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
70 |     result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
71 |   });
72 |   return result;
73 | }
74 | 
75 | } // namespace detectron2
76 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <c10/cuda/CUDAGuard.h>
  5 | #include <ATen/cuda/CUDAApplyUtils.cuh>
  6 | #ifdef WITH_CUDA
  7 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
  8 | #endif
  9 | // TODO avoid this when pytorch supports "same directory" hipification
 10 | #ifdef WITH_HIP
 11 | #include "box_iou_rotated/box_iou_rotated_utils.h"
 12 | #endif
 13 | 
 14 | using namespace detectron2;
 15 | 
 16 | namespace {
 17 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 18 | }
 19 | 
 20 | template <typename T>
 21 | __global__ void nms_rotated_cuda_kernel(
 22 |     const int n_boxes,
 23 |     const float iou_threshold,
 24 |     const T* dev_boxes,
 25 |     unsigned long long* dev_mask) {
 26 |   // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
 27 | 
 28 |   const int row_start = blockIdx.y;
 29 |   const int col_start = blockIdx.x;
 30 | 
 31 |   // if (row_start > col_start) return;
 32 | 
 33 |   const int row_size =
 34 |       min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 35 |   const int col_size =
 36 |       min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 37 | 
 38 |   // Compared to nms_cuda_kernel, where each box is represented with 4 values
 39 |   // (x1, y1, x2, y2), each rotated box is represented with 5 values
 40 |   // (x_center, y_center, width, height, angle_degrees) here.
 41 |   __shared__ T block_boxes[threadsPerBlock * 5];
 42 |   if (threadIdx.x < col_size) {
 43 |     block_boxes[threadIdx.x * 5 + 0] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 45 |     block_boxes[threadIdx.x * 5 + 1] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 47 |     block_boxes[threadIdx.x * 5 + 2] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 49 |     block_boxes[threadIdx.x * 5 + 3] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 51 |     block_boxes[threadIdx.x * 5 + 4] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 53 |   }
 54 |   __syncthreads();
 55 | 
 56 |   if (threadIdx.x < row_size) {
 57 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 58 |     const T* cur_box = dev_boxes + cur_box_idx * 5;
 59 |     int i = 0;
 60 |     unsigned long long t = 0;
 61 |     int start = 0;
 62 |     if (row_start == col_start) {
 63 |       start = threadIdx.x + 1;
 64 |     }
 65 |     for (i = start; i < col_size; i++) {
 66 |       // Instead of devIoU used by original horizontal nms, here
 67 |       // we use the single_box_iou_rotated function from box_iou_rotated_utils.h
 68 |       if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 5) >
 69 |           iou_threshold) {
 70 |         t |= 1ULL << i;
 71 |       }
 72 |     }
 73 |     const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock);
 74 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 75 |   }
 76 | }
 77 | 
 78 | namespace detectron2 {
 79 | 
 80 | at::Tensor nms_rotated_cuda(
 81 |     // input must be contiguous
 82 |     const at::Tensor& dets,
 83 |     const at::Tensor& scores,
 84 |     float iou_threshold) {
 85 |   // using scalar_t = float;
 86 |   AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
 87 |   AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
 88 |   at::cuda::CUDAGuard device_guard(dets.device());
 89 | 
 90 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 91 |   auto dets_sorted = dets.index_select(0, order_t);
 92 | 
 93 |   auto dets_num = dets.size(0);
 94 | 
 95 |   const int col_blocks =
 96 |       at::cuda::ATenCeilDiv(static_cast<int>(dets_num), threadsPerBlock);
 97 | 
 98 |   at::Tensor mask =
 99 |       at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
100 | 
101 |   dim3 blocks(col_blocks, col_blocks);
102 |   dim3 threads(threadsPerBlock);
103 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
104 | 
105 |   AT_DISPATCH_FLOATING_TYPES(
106 |       dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] {
107 |         nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
108 |             dets_num,
109 |             iou_threshold,
110 |             dets_sorted.data_ptr<scalar_t>(),
111 |             (unsigned long long*)mask.data_ptr<int64_t>());
112 |       });
113 | 
114 |   at::Tensor mask_cpu = mask.to(at::kCPU);
115 |   unsigned long long* mask_host =
116 |       (unsigned long long*)mask_cpu.data_ptr<int64_t>();
117 | 
118 |   std::vector<unsigned long long> remv(col_blocks);
119 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
120 | 
121 |   at::Tensor keep =
122 |       at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
123 |   int64_t* keep_out = keep.data_ptr<int64_t>();
124 | 
125 |   int num_to_keep = 0;
126 |   for (int i = 0; i < dets_num; i++) {
127 |     int nblock = i / threadsPerBlock;
128 |     int inblock = i % threadsPerBlock;
129 | 
130 |     if (!(remv[nblock] & (1ULL << inblock))) {
131 |       keep_out[num_to_keep++] = i;
132 |       unsigned long long* p = mask_host + i * col_blocks;
133 |       for (int j = nblock; j < col_blocks; j++) {
134 |         remv[j] |= p[j];
135 |       }
136 |     }
137 |   }
138 | 
139 |   AT_CUDA_CHECK(cudaGetLastError());
140 |   return order_t.index(
141 |       {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
142 |            .to(order_t.device(), keep.scalar_type())});
143 | }
144 | 
145 | } // namespace detectron2
146 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | #include <torch/extension.h>
  4 | #include "ROIAlign/ROIAlign.h"
  5 | #include "ROIAlignRotated/ROIAlignRotated.h"
  6 | #include "box_iou_rotated/box_iou_rotated.h"
  7 | #include "cocoeval/cocoeval.h"
  8 | #include "deformable/deform_conv.h"
  9 | #include "nms_rotated/nms_rotated.h"
 10 | 
 11 | namespace detectron2 {
 12 | 
 13 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 14 | extern int get_cudart_version();
 15 | #endif
 16 | 
 17 | std::string get_cuda_version() {
 18 | #if defined(WITH_CUDA) || defined(WITH_HIP)
 19 |   std::ostringstream oss;
 20 | 
 21 | #if defined(WITH_CUDA)
 22 |   oss << "CUDA ";
 23 | #else
 24 |   oss << "HIP ";
 25 | #endif
 26 | 
 27 |   // copied from
 28 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
 29 |   auto printCudaStyleVersion = [&](int v) {
 30 |     oss << (v / 1000) << "." << (v / 10 % 100);
 31 |     if (v % 10 != 0) {
 32 |       oss << "." << (v % 10);
 33 |     }
 34 |   };
 35 |   printCudaStyleVersion(get_cudart_version());
 36 |   return oss.str();
 37 | #else // neither CUDA nor HIP
 38 |   return std::string("not available");
 39 | #endif
 40 | }
 41 | 
 42 | // similar to
 43 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
 44 | std::string get_compiler_version() {
 45 |   std::ostringstream ss;
 46 | #if defined(__GNUC__)
 47 | #ifndef __clang__
 48 | 
 49 | #if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8))
 50 | #error "GCC >= 4.9 is required!"
 51 | #endif
 52 | 
 53 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
 54 | #endif
 55 | #endif
 56 | 
 57 | #if defined(__clang_major__)
 58 |   {
 59 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
 60 |        << __clang_patchlevel__;
 61 |   }
 62 | #endif
 63 | 
 64 | #if defined(_MSC_VER)
 65 |   { ss << "MSVC " << _MSC_FULL_VER; }
 66 | #endif
 67 |   return ss.str();
 68 | }
 69 | 
 70 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 71 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
 72 |   m.def("get_cuda_version", &get_cuda_version, "get_cuda_version");
 73 | 
 74 |   m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
 75 | 
 76 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
 77 |   m.def(
 78 |       "deform_conv_backward_input",
 79 |       &deform_conv_backward_input,
 80 |       "deform_conv_backward_input");
 81 |   m.def(
 82 |       "deform_conv_backward_filter",
 83 |       &deform_conv_backward_filter,
 84 |       "deform_conv_backward_filter");
 85 |   m.def(
 86 |       "modulated_deform_conv_forward",
 87 |       &modulated_deform_conv_forward,
 88 |       "modulated_deform_conv_forward");
 89 |   m.def(
 90 |       "modulated_deform_conv_backward",
 91 |       &modulated_deform_conv_backward,
 92 |       "modulated_deform_conv_backward");
 93 | 
 94 |   m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
 95 | 
 96 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
 97 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
 98 | 
 99 |   m.def(
100 |       "roi_align_rotated_forward",
101 |       &ROIAlignRotated_forward,
102 |       "Forward pass for Rotated ROI-Align Operator");
103 |   m.def(
104 |       "roi_align_rotated_backward",
105 |       &ROIAlignRotated_backward,
106 |       "Backward pass for Rotated ROI-Align Operator");
107 | 
108 |   m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
109 |   m.def(
110 |       "COCOevalEvaluateImages",
111 |       &COCOeval::EvaluateImages,
112 |       "COCOeval::EvaluateImages");
113 |   pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
114 |       .def(pybind11::init<uint64_t, double, double, bool, bool>());
115 |   pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
116 |       .def(pybind11::init<>());
117 | }
118 | } // namespace detectron2
119 | 


--------------------------------------------------------------------------------
/detectron2/layers/roi_align.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from torch import nn
  3 | from torchvision.ops import roi_align as tv_roi_align
  4 | 
  5 | try:
  6 |     from torchvision import __version__
  7 | 
  8 |     version = tuple(int(x) for x in __version__.split(".")[:2])
  9 |     USE_TORCHVISION = version >= (0, 7)  # https://github.com/pytorch/vision/pull/2438
 10 | except ImportError:  # only open source torchvision has __version__
 11 |     USE_TORCHVISION = True
 12 | 
 13 | 
 14 | if USE_TORCHVISION:
 15 |     roi_align = tv_roi_align
 16 | else:
 17 |     from torch.nn.modules.utils import _pair
 18 |     from torch.autograd import Function
 19 |     from torch.autograd.function import once_differentiable
 20 |     from detectron2 import _C
 21 | 
 22 |     class _ROIAlign(Function):
 23 |         @staticmethod
 24 |         def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
 25 |             ctx.save_for_backward(roi)
 26 |             ctx.output_size = _pair(output_size)
 27 |             ctx.spatial_scale = spatial_scale
 28 |             ctx.sampling_ratio = sampling_ratio
 29 |             ctx.input_shape = input.size()
 30 |             ctx.aligned = aligned
 31 |             output = _C.roi_align_forward(
 32 |                 input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
 33 |             )
 34 |             return output
 35 | 
 36 |         @staticmethod
 37 |         @once_differentiable
 38 |         def backward(ctx, grad_output):
 39 |             (rois,) = ctx.saved_tensors
 40 |             output_size = ctx.output_size
 41 |             spatial_scale = ctx.spatial_scale
 42 |             sampling_ratio = ctx.sampling_ratio
 43 |             bs, ch, h, w = ctx.input_shape
 44 |             grad_input = _C.roi_align_backward(
 45 |                 grad_output,
 46 |                 rois,
 47 |                 spatial_scale,
 48 |                 output_size[0],
 49 |                 output_size[1],
 50 |                 bs,
 51 |                 ch,
 52 |                 h,
 53 |                 w,
 54 |                 sampling_ratio,
 55 |                 ctx.aligned,
 56 |             )
 57 |             return grad_input, None, None, None, None, None
 58 | 
 59 |     roi_align = _ROIAlign.apply
 60 | 
 61 | 
 62 | # NOTE: torchvision's RoIAlign has a different default aligned=False
 63 | class ROIAlign(nn.Module):
 64 |     def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
 65 |         """
 66 |         Args:
 67 |             output_size (tuple): h, w
 68 |             spatial_scale (float): scale the input boxes by this number
 69 |             sampling_ratio (int): number of inputs samples to take for each output
 70 |                 sample. 0 to take samples densely.
 71 |             aligned (bool): if False, use the legacy implementation in
 72 |                 Detectron. If True, align the results more perfectly.
 73 | 
 74 |         Note:
 75 |             The meaning of aligned=True:
 76 | 
 77 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
 78 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
 79 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
 80 |             from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
 81 |             roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
 82 |             pixel indices and therefore it uses pixels with a slightly incorrect alignment
 83 |             (relative to our pixel model) when performing bilinear interpolation.
 84 | 
 85 |             With `aligned=True`,
 86 |             we first appropriately scale the ROI and then shift it by -0.5
 87 |             prior to calling roi_align. This produces the correct neighbors; see
 88 |             detectron2/tests/test_roi_align.py for verification.
 89 | 
 90 |             The difference does not make a difference to the model's performance if
 91 |             ROIAlign is used together with conv layers.
 92 |         """
 93 |         super(ROIAlign, self).__init__()
 94 |         self.output_size = output_size
 95 |         self.spatial_scale = spatial_scale
 96 |         self.sampling_ratio = sampling_ratio
 97 |         self.aligned = aligned
 98 | 
 99 |     def forward(self, input, rois):
100 |         """
101 |         Args:
102 |             input: NCHW images
103 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
104 |         """
105 |         assert rois.dim() == 2 and rois.size(1) == 5
106 |         return roi_align(
107 |             input,
108 |             rois.to(dtype=input.dtype),
109 |             self.output_size,
110 |             self.spatial_scale,
111 |             self.sampling_ratio,
112 |             self.aligned,
113 |         )
114 | 
115 |     def __repr__(self):
116 |         tmpstr = self.__class__.__name__ + "("
117 |         tmpstr += "output_size=" + str(self.output_size)
118 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
119 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
120 |         tmpstr += ", aligned=" + str(self.aligned)
121 |         tmpstr += ")"
122 |         return tmpstr
123 | 


--------------------------------------------------------------------------------
/detectron2/layers/roi_align_rotated.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from detectron2 import _C
 9 | 
10 | 
11 | class _ROIAlignRotated(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_rotated_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         (rois,) = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_rotated_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None, None
45 | 
46 | 
47 | roi_align_rotated = _ROIAlignRotated.apply
48 | 
49 | 
50 | class ROIAlignRotated(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         """
53 |         Args:
54 |             output_size (tuple): h, w
55 |             spatial_scale (float): scale the input boxes by this number
56 |             sampling_ratio (int): number of inputs samples to take for each output
57 |                 sample. 0 to take samples densely.
58 | 
59 |         Note:
60 |             ROIAlignRotated supports continuous coordinate by default:
61 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
62 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
63 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
64 |             from the underlying signal at continuous coordinates 0.5 and 1.5).
65 |         """
66 |         super(ROIAlignRotated, self).__init__()
67 |         self.output_size = output_size
68 |         self.spatial_scale = spatial_scale
69 |         self.sampling_ratio = sampling_ratio
70 | 
71 |     def forward(self, input, rois):
72 |         """
73 |         Args:
74 |             input: NCHW images
75 |             rois: Bx6 boxes. First column is the index into N.
76 |                 The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
77 |         """
78 |         assert rois.dim() == 2 and rois.size(1) == 6
79 |         orig_dtype = input.dtype
80 |         if orig_dtype == torch.float16:
81 |             input = input.float()
82 |             rois = rois.float()
83 |         return roi_align_rotated(
84 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
85 |         ).to(dtype=orig_dtype)
86 | 
87 |     def __repr__(self):
88 |         tmpstr = self.__class__.__name__ + "("
89 |         tmpstr += "output_size=" + str(self.output_size)
90 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
91 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
92 |         tmpstr += ")"
93 |         return tmpstr
94 | 


--------------------------------------------------------------------------------
/detectron2/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from __future__ import absolute_import, division, print_function, unicode_literals
 3 | 
 4 | from detectron2 import _C
 5 | 
 6 | 
 7 | def pairwise_iou_rotated(boxes1, boxes2):
 8 |     """
 9 |     Return intersection-over-union (Jaccard index) of boxes.
10 | 
11 |     Both sets of boxes are expected to be in
12 |     (x_center, y_center, width, height, angle) format.
13 | 
14 |     Arguments:
15 |         boxes1 (Tensor[N, 5])
16 |         boxes2 (Tensor[M, 5])
17 | 
18 |     Returns:
19 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
20 |             IoU values for every element in boxes1 and boxes2
21 |     """
22 |     return _C.box_iou_rotated(boxes1, boxes2)
23 | 


--------------------------------------------------------------------------------
/detectron2/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 7 |     """
 8 |     A simple structure that contains basic shape specification about a tensor.
 9 |     It is often used as the auxiliary inputs/outputs of models,
10 |     to complement the lack of shape inference ability among pytorch modules.
11 | 
12 |     Attributes:
13 |         channels:
14 |         height:
15 |         width:
16 |         stride:
17 |     """
18 | 
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/detectron2/layers/wrappers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | """
  3 | Wrappers around on some nn functions, mainly to support empty tensors.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in those functions.
  6 | 
  7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013
  8 | is implemented
  9 | """
 10 | 
 11 | from typing import List
 12 | import torch
 13 | from torch.nn import functional as F
 14 | 
 15 | from detectron2.utils.env import TORCH_VERSION
 16 | 
 17 | 
 18 | def cat(tensors: List[torch.Tensor], dim: int = 0):
 19 |     """
 20 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
 21 |     """
 22 |     assert isinstance(tensors, (list, tuple))
 23 |     if len(tensors) == 1:
 24 |         return tensors[0]
 25 |     return torch.cat(tensors, dim)
 26 | 
 27 | 
 28 | class _NewEmptyTensorOp(torch.autograd.Function):
 29 |     @staticmethod
 30 |     def forward(ctx, x, new_shape):
 31 |         ctx.shape = x.shape
 32 |         return x.new_empty(new_shape)
 33 | 
 34 |     @staticmethod
 35 |     def backward(ctx, grad):
 36 |         shape = ctx.shape
 37 |         return _NewEmptyTensorOp.apply(grad, shape), None
 38 | 
 39 | 
 40 | class Conv2d(torch.nn.Conv2d):
 41 |     """
 42 |     A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
 43 |     """
 44 | 
 45 |     def __init__(self, *args, **kwargs):
 46 |         """
 47 |         Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
 48 | 
 49 |         Args:
 50 |             norm (nn.Module, optional): a normalization layer
 51 |             activation (callable(Tensor) -> Tensor): a callable activation function
 52 | 
 53 |         It assumes that norm layer is used before activation.
 54 |         """
 55 |         norm = kwargs.pop("norm", None)
 56 |         activation = kwargs.pop("activation", None)
 57 |         super().__init__(*args, **kwargs)
 58 | 
 59 |         self.norm = norm
 60 |         self.activation = activation
 61 | 
 62 |     def forward(self, x):
 63 |         # torchscript does not support SyncBatchNorm yet
 64 |         # https://github.com/pytorch/pytorch/issues/40507
 65 |         # and we skip these codes in torchscript since:
 66 |         # 1. currently we only support torchscript in evaluation mode
 67 |         # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
 68 |         # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
 69 |         if not torch.jit.is_scripting():
 70 |             if x.numel() == 0 and self.training:
 71 |                 # https://github.com/pytorch/pytorch/issues/12013
 72 |                 assert not isinstance(
 73 |                     self.norm, torch.nn.SyncBatchNorm
 74 |                 ), "SyncBatchNorm does not support empty inputs!"
 75 | 
 76 |         x = F.conv2d(
 77 |             x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
 78 |         )
 79 |         if self.norm is not None:
 80 |             x = self.norm(x)
 81 |         if self.activation is not None:
 82 |             x = self.activation(x)
 83 |         return x
 84 | 
 85 | 
 86 | ConvTranspose2d = torch.nn.ConvTranspose2d
 87 | BatchNorm2d = torch.nn.BatchNorm2d
 88 | interpolate = F.interpolate
 89 | 
 90 | 
 91 | if TORCH_VERSION > (1, 5):
 92 |     Linear = torch.nn.Linear
 93 | else:
 94 | 
 95 |     class Linear(torch.nn.Linear):
 96 |         """
 97 |         A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features.
 98 |         Because of https://github.com/pytorch/pytorch/issues/34202
 99 |         """
100 | 
101 |         def forward(self, x):
102 |             if x.numel() == 0:
103 |                 output_shape = [x.shape[0], self.weight.shape[0]]
104 | 
105 |                 empty = _NewEmptyTensorOp.apply(x, output_shape)
106 |                 if self.training:
107 |                     # This is to make DDP happy.
108 |                     # DDP expects all workers to have gradient w.r.t the same set of parameters.
109 |                     _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
110 |                     return empty + _dummy
111 |                 else:
112 |                     return empty
113 | 
114 |             x = super().forward(x)
115 |             return x
116 | 
117 | 
118 | def nonzero_tuple(x):
119 |     """
120 |     A 'as_tuple=True' version of torch.nonzero to support torchscript.
121 |     because of https://github.com/pytorch/pytorch/issues/38718
122 |     """
123 |     if torch.jit.is_scripting():
124 |         if x.dim() == 0:
125 |             return x.unsqueeze(0).nonzero().unbind(1)
126 |         return x.nonzero().unbind(1)
127 |     else:
128 |         return x.nonzero(as_tuple=True)
129 | 


--------------------------------------------------------------------------------
/detectron2/model_zoo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | """
 3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and
 4 | optionally load pre-trained weights as released in
 5 | `MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_.
 6 | """
 7 | from .model_zoo import get, get_config_file, get_checkpoint_url, get_config
 8 | 
 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"]
10 | 


--------------------------------------------------------------------------------
/detectron2/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.layers import ShapeSpec
 3 | 
 4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
 5 | from .backbone import (
 6 |     BACKBONE_REGISTRY,
 7 |     FPN,
 8 |     Backbone,
 9 |     ResNet,
10 |     ResNetBlockBase,
11 |     build_backbone,
12 |     build_resnet_backbone,
13 |     make_stage,
14 | )
15 | from .meta_arch import (
16 |     META_ARCH_REGISTRY,
17 |     SEM_SEG_HEADS_REGISTRY,
18 |     GeneralizedRCNN,
19 |     PanopticFPN,
20 |     ProposalNetwork,
21 |     RetinaNet,
22 |     SemanticSegmentor,
23 |     build_model,
24 |     build_sem_seg_head,
25 | )
26 | from .postprocessing import detector_postprocess
27 | from .proposal_generator import (
28 |     PROPOSAL_GENERATOR_REGISTRY,
29 |     build_proposal_generator,
30 |     RPN_HEAD_REGISTRY,
31 |     build_rpn_head,
32 | )
33 | from .roi_heads import (
34 |     ROI_BOX_HEAD_REGISTRY,
35 |     ROI_HEADS_REGISTRY,
36 |     ROI_KEYPOINT_HEAD_REGISTRY,
37 |     ROI_MASK_HEAD_REGISTRY,
38 |     ROIHeads,
39 |     StandardROIHeads,
40 |     BaseMaskRCNNHead,
41 |     BaseKeypointRCNNHead,
42 |     FastRCNNOutputLayers,
43 |     build_box_head,
44 |     build_keypoint_head,
45 |     build_mask_head,
46 |     build_roi_heads,
47 | )
48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
49 | 
50 | _EXCLUDE = {"ShapeSpec"}
51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
52 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
 3 | 
 4 | from .backbone import Backbone
 5 | from .fpn import FPN
 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
 7 | 
 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 9 | # TODO can expose more resnet blocks after careful consideration
10 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from abc import ABCMeta, abstractmethod
 3 | import torch.nn as nn
 4 | 
 5 | from detectron2.layers import ShapeSpec
 6 | 
 7 | __all__ = ["Backbone"]
 8 | 
 9 | 
10 | class Backbone(nn.Module, metaclass=ABCMeta):
11 |     """
12 |     Abstract base class for network backbones.
13 |     """
14 | 
15 |     def __init__(self):
16 |         """
17 |         The `__init__` method of any subclass can specify its own set of arguments.
18 |         """
19 |         super().__init__()
20 | 
21 |     @abstractmethod
22 |     def forward(self):
23 |         """
24 |         Subclasses must override this method, but adhere to the same return type.
25 | 
26 |         Returns:
27 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
28 |         """
29 |         pass
30 | 
31 |     @property
32 |     def size_divisibility(self) -> int:
33 |         """
34 |         Some backbones require the input height and width to be divisible by a
35 |         specific integer. This is typically true for encoder / decoder type networks
36 |         with lateral connection (e.g., FPN) for which feature maps need to match
37 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
38 |         input size divisibility is required.
39 |         """
40 |         return 0
41 | 
42 |     def output_shape(self):
43 |         """
44 |         Returns:
45 |             dict[str->ShapeSpec]
46 |         """
47 |         # this is a backward-compatible default
48 |         return {
49 |             name: ShapeSpec(
50 |                 channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
51 |             )
52 |             for name in self._out_features
53 |         }
54 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.utils.registry import Registry
 4 | 
 5 | from .backbone import Backbone
 6 | 
 7 | BACKBONE_REGISTRY = Registry("BACKBONE")
 8 | BACKBONE_REGISTRY.__doc__ = """
 9 | Registry for backbones, which extract feature maps from images
10 | 
11 | The registered object must be a callable that accepts two arguments:
12 | 
13 | 1. A :class:`detectron2.config.CfgNode`
14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
15 | 
16 | Registered object must return instance of :class:`Backbone`.
17 | """
18 | 
19 | 
20 | def build_backbone(cfg, input_shape=None):
21 |     """
22 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
23 | 
24 |     Returns:
25 |         an instance of :class:`Backbone`
26 |     """
27 |     if input_shape is None:
28 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
29 | 
30 |     backbone_name = cfg.MODEL.BACKBONE.NAME
31 |     backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
32 |     assert isinstance(backbone, Backbone)
33 |     return backbone
34 | 


--------------------------------------------------------------------------------
/detectron2/modeling/matcher.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from typing import List
  3 | import torch
  4 | 
  5 | from detectron2.layers import nonzero_tuple
  6 | 
  7 | 
  8 | class Matcher(object):
  9 |     """
 10 |     This class assigns to each predicted "element" (e.g., a box) a ground-truth
 11 |     element. Each predicted element will have exactly zero or one matches; each
 12 |     ground-truth element may be matched to zero or more predicted elements.
 13 | 
 14 |     The matching is determined by the MxN match_quality_matrix, that characterizes
 15 |     how well each (ground-truth, prediction)-pair match each other. For example,
 16 |     if the elements are boxes, this matrix may contain box intersection-over-union
 17 |     overlap values.
 18 | 
 19 |     The matcher returns (a) a vector of length N containing the index of the
 20 |     ground-truth element m in [0, M) that matches to prediction n in [0, N).
 21 |     (b) a vector of length N containing the labels for each prediction.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
 26 |     ):
 27 |         """
 28 |         Args:
 29 |             thresholds (list): a list of thresholds used to stratify predictions
 30 |                 into levels.
 31 |             labels (list): a list of values to label predictions belonging at
 32 |                 each level. A label can be one of {-1, 0, 1} signifying
 33 |                 {ignore, negative class, positive class}, respectively.
 34 |             allow_low_quality_matches (bool): if True, produce additional matches
 35 |                 for predictions with maximum match quality lower than high_threshold.
 36 |                 See set_low_quality_matches_ for more details.
 37 | 
 38 |             For example,
 39 |                 thresholds = [0.3, 0.5]
 40 |                 labels = [0, -1, 1]
 41 |                 All predictions with iou < 0.3 will be marked with 0 and
 42 |                 thus will be considered as false positives while training.
 43 |                 All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
 44 |                 thus will be ignored.
 45 |                 All predictions with 0.5 <= iou will be marked with 1 and
 46 |                 thus will be considered as true positives.
 47 |         """
 48 |         # Add -inf and +inf to first and last position in thresholds
 49 |         thresholds = thresholds[:]
 50 |         assert thresholds[0] > 0
 51 |         thresholds.insert(0, -float("inf"))
 52 |         thresholds.append(float("inf"))
 53 |         # Currently torchscript does not support all + generator
 54 |         assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
 55 |         assert all([l in [-1, 0, 1] for l in labels])
 56 |         assert len(labels) == len(thresholds) - 1
 57 |         self.thresholds = thresholds
 58 |         self.labels = labels
 59 |         self.allow_low_quality_matches = allow_low_quality_matches
 60 | 
 61 |     def __call__(self, match_quality_matrix):
 62 |         """
 63 |         Args:
 64 |             match_quality_matrix (Tensor[float]): an MxN tensor, containing the
 65 |                 pairwise quality between M ground-truth elements and N predicted
 66 |                 elements. All elements must be >= 0 (due to the us of `torch.nonzero`
 67 |                 for selecting indices in :meth:`set_low_quality_matches_`).
 68 | 
 69 |         Returns:
 70 |             matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
 71 |                 ground-truth index in [0, M)
 72 |             match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
 73 |                 whether a prediction is a true or false positive or ignored
 74 |         """
 75 |         assert match_quality_matrix.dim() == 2
 76 |         if match_quality_matrix.numel() == 0:
 77 |             default_matches = match_quality_matrix.new_full(
 78 |                 (match_quality_matrix.size(1),), 0, dtype=torch.int64
 79 |             )
 80 |             # When no gt boxes exist, we define IOU = 0 and therefore set labels
 81 |             # to `self.labels[0]`, which usually defaults to background class 0
 82 |             # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
 83 |             default_match_labels = match_quality_matrix.new_full(
 84 |                 (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
 85 |             )
 86 |             return default_matches, default_match_labels
 87 | 
 88 |         assert torch.all(match_quality_matrix >= 0)
 89 | 
 90 |         # match_quality_matrix is M (gt) x N (predicted)
 91 |         # Max over gt elements (dim 0) to find best gt candidate for each prediction
 92 |         matched_vals, matches = match_quality_matrix.max(dim=0)
 93 | 
 94 |         match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
 95 | 
 96 |         for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
 97 |             low_high = (matched_vals >= low) & (matched_vals < high)
 98 |             match_labels[low_high] = l
 99 | 
100 |         if self.allow_low_quality_matches:
101 |             self.set_low_quality_matches_(match_labels, match_quality_matrix)
102 | 
103 |         return matches, match_labels
104 | 
105 |     def set_low_quality_matches_(self, match_labels, match_quality_matrix):
106 |         """
107 |         Produce additional matches for predictions that have only low-quality matches.
108 |         Specifically, for each ground-truth G find the set of predictions that have
109 |         maximum overlap with it (including ties); for each prediction in that set, if
110 |         it is unmatched, then match it to the ground-truth G.
111 | 
112 |         This function implements the RPN assignment case (i) in Sec. 3.1.2 of
113 |         :paper:`Faster R-CNN`.
114 |         """
115 |         # For each gt, find the prediction with which it has highest quality
116 |         highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
117 |         # Find the highest quality match available, even if it is low, including ties.
118 |         # Note that the matches qualities must be positive due to the use of
119 |         # `torch.nonzero`.
120 |         _, pred_inds_with_highest_quality = nonzero_tuple(
121 |             match_quality_matrix == highest_quality_foreach_gt[:, None]
122 |         )
123 |         # If an anchor was labeled positive only due to a low-quality match
124 |         # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
125 |         # This follows the implementation in Detectron, and is found to have no significant impact.
126 |         match_labels[pred_inds_with_highest_quality] = 1
127 | 


--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | from .build import META_ARCH_REGISTRY, build_model  # isort:skip
 5 | 
 6 | from .panoptic_fpn import PanopticFPN
 7 | 
 8 | # import all the meta_arch, so they will be registered
 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork
10 | from .retinanet import RetinaNet
11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
12 | 
13 | 
14 | __all__ = list(globals().keys())
15 | 


--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | 
 4 | from detectron2.utils.registry import Registry
 5 | 
 6 | META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
 7 | META_ARCH_REGISTRY.__doc__ = """
 8 | Registry for meta-architectures, i.e. the whole model.
 9 | 
10 | The registered object will be called with `obj(cfg)`
11 | and expected to return a `nn.Module` object.
12 | """
13 | 
14 | 
15 | def build_model(cfg):
16 |     """
17 |     Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
18 |     Note that it does not load any weights from ``cfg``.
19 |     """
20 |     meta_arch = cfg.MODEL.META_ARCHITECTURE
21 |     model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
22 |     model.to(torch.device(cfg.MODEL.DEVICE))
23 |     return model
24 | 


--------------------------------------------------------------------------------
/detectron2/modeling/postprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import torch
  3 | from torch.nn import functional as F
  4 | 
  5 | from detectron2.layers import paste_masks_in_image
  6 | from detectron2.structures import Instances
  7 | from detectron2.utils.memory import retry_if_cuda_oom
  8 | 
  9 | 
 10 | # perhaps should rename to "resize_instance"
 11 | def detector_postprocess(
 12 |     results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5
 13 | ):
 14 |     """
 15 |     Resize the output instances.
 16 |     The input images are often resized when entering an object detector.
 17 |     As a result, we often need the outputs of the detector in a different
 18 |     resolution from its inputs.
 19 | 
 20 |     This function will resize the raw outputs of an R-CNN detector
 21 |     to produce outputs according to the desired output resolution.
 22 | 
 23 |     Args:
 24 |         results (Instances): the raw outputs from the detector.
 25 |             `results.image_size` contains the input image resolution the detector sees.
 26 |             This object might be modified in-place.
 27 |         output_height, output_width: the desired output resolution.
 28 | 
 29 |     Returns:
 30 |         Instances: the resized output from the model, based on the output resolution
 31 |     """
 32 |     # Change to 'if is_tracing' after PT1.7
 33 |     if isinstance(output_height, torch.Tensor):
 34 |         # Converts integer tensors to float temporaries to ensure true
 35 |         # division is performed when computing scale_x and scale_y.
 36 |         output_width_tmp = output_width.float()
 37 |         output_height_tmp = output_height.float()
 38 |         new_size = torch.stack([output_height, output_width])
 39 |     else:
 40 |         new_size = (output_height, output_width)
 41 |         output_width_tmp = output_width
 42 |         output_height_tmp = output_height
 43 | 
 44 |     scale_x, scale_y = (
 45 |         output_width_tmp / results.image_size[1],
 46 |         output_height_tmp / results.image_size[0],
 47 |     )
 48 |     results = Instances(new_size, **results.get_fields())
 49 | 
 50 |     if results.has("pred_boxes"):
 51 |         output_boxes = results.pred_boxes
 52 |     elif results.has("proposal_boxes"):
 53 |         output_boxes = results.proposal_boxes
 54 |     else:
 55 |         output_boxes = None
 56 |     assert output_boxes is not None, "Predictions must contain boxes!"
 57 | 
 58 |     output_boxes.scale(scale_x, scale_y)
 59 |     output_boxes.clip(results.image_size)
 60 | 
 61 |     results = results[output_boxes.nonempty()]
 62 | 
 63 |     if results.has("pred_masks"):
 64 |         results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
 65 |             results.pred_masks[:, 0, :, :],  # N, 1, M, M
 66 |             results.pred_boxes,
 67 |             results.image_size,
 68 |             threshold=mask_threshold,
 69 |         )
 70 | 
 71 |     if results.has("pred_keypoints"):
 72 |         results.pred_keypoints[:, :, 0] *= scale_x
 73 |         results.pred_keypoints[:, :, 1] *= scale_y
 74 | 
 75 |     return results
 76 | 
 77 | 
 78 | def sem_seg_postprocess(result, img_size, output_height, output_width):
 79 |     """
 80 |     Return semantic segmentation predictions in the original resolution.
 81 | 
 82 |     The input images are often resized when entering semantic segmentor. Moreover, in same
 83 |     cases, they also padded inside segmentor to be divisible by maximum network stride.
 84 |     As a result, we often need the predictions of the segmentor in a different
 85 |     resolution from its inputs.
 86 | 
 87 |     Args:
 88 |         result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
 89 |             where C is the number of classes, and H, W are the height and width of the prediction.
 90 |         img_size (tuple): image size that segmentor is taking as input.
 91 |         output_height, output_width: the desired output resolution.
 92 | 
 93 |     Returns:
 94 |         semantic segmentation prediction (Tensor): A tensor of the shape
 95 |             (C, output_height, output_width) that contains per-pixel soft predictions.
 96 |     """
 97 |     result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
 98 |     result = F.interpolate(
 99 |         result, size=(output_height, output_width), mode="bilinear", align_corners=False
100 |     )[0]
101 |     return result
102 | 


--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
4 | 
5 | __all__ = list(globals().keys())
6 | 


--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from detectron2.utils.registry import Registry
 3 | 
 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
 6 | Registry for proposal generator, which produces object proposals from feature maps.
 7 | 
 8 | The registered object will be called with `obj(cfg, input_shape)`.
 9 | The call should return a `nn.Module` object.
10 | """
11 | 
12 | from . import rpn, rrpn  # noqa F401 isort:skip
13 | 
14 | 
15 | def build_proposal_generator(cfg, input_shape):
16 |     """
17 |     Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
18 |     The name can be "PrecomputedProposals" to use no proposal generator.
19 |     """
20 |     name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
21 |     if name == "PrecomputedProposals":
22 |         return None
23 | 
24 |     return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
25 | 


--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead
 3 | from .keypoint_head import (
 4 |     ROI_KEYPOINT_HEAD_REGISTRY,
 5 |     build_keypoint_head,
 6 |     BaseKeypointRCNNHead,
 7 |     KRCNNConvDeconvUpsampleHead,
 8 | )
 9 | from .mask_head import (
10 |     ROI_MASK_HEAD_REGISTRY,
11 |     build_mask_head,
12 |     BaseMaskRCNNHead,
13 |     MaskRCNNConvUpsampleHead,
14 | )
15 | from .roi_heads import (
16 |     ROI_HEADS_REGISTRY,
17 |     ROIHeads,
18 |     Res5ROIHeads,
19 |     StandardROIHeads,
20 |     build_roi_heads,
21 |     select_foreground_proposals,
22 | )
23 | from .rotated_fast_rcnn import RROIHeads
24 | from .fast_rcnn import FastRCNNOutputLayers
25 | 
26 | from . import cascade_rcnn  # isort:skip
27 | 
28 | __all__ = list(globals().keys())
29 | 


--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/box_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import numpy as np
  3 | from typing import List
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | 
  8 | from detectron2.config import configurable
  9 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm
 10 | from detectron2.utils.registry import Registry
 11 | 
 12 | __all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"]
 13 | 
 14 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
 15 | ROI_BOX_HEAD_REGISTRY.__doc__ = """
 16 | Registry for box heads, which make box predictions from per-region features.
 17 | 
 18 | The registered object will be called with `obj(cfg, input_shape)`.
 19 | """
 20 | 
 21 | 
 22 | # To get torchscript support, we make the head a subclass of `nn.Sequential`.
 23 | # Therefore, to add new layers in this head class, please make sure they are
 24 | # added in the order they will be used in forward().
 25 | @ROI_BOX_HEAD_REGISTRY.register()
 26 | class FastRCNNConvFCHead(nn.Sequential):
 27 |     """
 28 |     A head with several 3x3 conv layers (each followed by norm & relu) and then
 29 |     several fc layers (each followed by relu).
 30 |     """
 31 | 
 32 |     @configurable
 33 |     def __init__(
 34 |         self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
 35 |     ):
 36 |         """
 37 |         NOTE: this interface is experimental.
 38 | 
 39 |         Args:
 40 |             input_shape (ShapeSpec): shape of the input feature.
 41 |             conv_dims (list[int]): the output dimensions of the conv layers
 42 |             fc_dims (list[int]): the output dimensions of the fc layers
 43 |             conv_norm (str or callable): normalization for the conv layers.
 44 |                 See :func:`detectron2.layers.get_norm` for supported types.
 45 |         """
 46 |         super().__init__()
 47 |         assert len(conv_dims) + len(fc_dims) > 0
 48 | 
 49 |         self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
 50 | 
 51 |         self.conv_norm_relus = []
 52 |         for k, conv_dim in enumerate(conv_dims):
 53 |             conv = Conv2d(
 54 |                 self._output_size[0],
 55 |                 conv_dim,
 56 |                 kernel_size=3,
 57 |                 padding=1,
 58 |                 bias=not conv_norm,
 59 |                 norm=get_norm(conv_norm, conv_dim),
 60 |                 activation=nn.ReLU(),
 61 |             )
 62 |             self.add_module("conv{}".format(k + 1), conv)
 63 |             self.conv_norm_relus.append(conv)
 64 |             self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
 65 | 
 66 |         self.fcs = []
 67 |         for k, fc_dim in enumerate(fc_dims):
 68 |             if k == 0:
 69 |                 self.add_module("flatten", nn.Flatten())
 70 |             fc = Linear(int(np.prod(self._output_size)), fc_dim)
 71 |             self.add_module("fc{}".format(k + 1), fc)
 72 |             self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
 73 |             self.fcs.append(fc)
 74 |             self._output_size = fc_dim
 75 | 
 76 |         for layer in self.conv_norm_relus:
 77 |             weight_init.c2_msra_fill(layer)
 78 |         for layer in self.fcs:
 79 |             weight_init.c2_xavier_fill(layer)
 80 | 
 81 |     @classmethod
 82 |     def from_config(cls, cfg, input_shape):
 83 |         num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
 84 |         conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
 85 |         num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
 86 |         fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
 87 |         return {
 88 |             "input_shape": input_shape,
 89 |             "conv_dims": [conv_dim] * num_conv,
 90 |             "fc_dims": [fc_dim] * num_fc,
 91 |             "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
 92 |         }
 93 | 
 94 |     def forward(self, x):
 95 |         for layer in self:
 96 |             x = layer(x)
 97 |         return x
 98 | 
 99 |     @property
100 |     @torch.jit.unused
101 |     def output_shape(self):
102 |         """
103 |         Returns:
104 |             ShapeSpec: the output feature shape
105 |         """
106 |         o = self._output_size
107 |         if isinstance(o, int):
108 |             return ShapeSpec(channels=o)
109 |         else:
110 |             return ShapeSpec(channels=o[0], height=o[1], width=o[2])
111 | 
112 | 
113 | def build_box_head(cfg, input_shape):
114 |     """
115 |     Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
116 |     """
117 |     name = cfg.MODEL.ROI_BOX_HEAD.NAME
118 |     return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
119 | 


--------------------------------------------------------------------------------
/detectron2/modeling/sampling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import torch
 3 | 
 4 | from detectron2.layers import nonzero_tuple
 5 | 
 6 | __all__ = ["subsample_labels"]
 7 | 
 8 | 
 9 | def subsample_labels(
10 |     labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
11 | ):
12 |     """
13 |     Return `num_samples` (or fewer, if not enough found)
14 |     random samples from `labels` which is a mixture of positives & negatives.
15 |     It will try to return as many positives as possible without
16 |     exceeding `positive_fraction * num_samples`, and then try to
17 |     fill the remaining slots with negatives.
18 | 
19 |     Args:
20 |         labels (Tensor): (N, ) label vector with values:
21 |             * -1: ignore
22 |             * bg_label: background ("negative") class
23 |             * otherwise: one or more foreground ("positive") classes
24 |         num_samples (int): The total number of labels with value >= 0 to return.
25 |             Values that are not sampled will be filled with -1 (ignore).
26 |         positive_fraction (float): The number of subsampled labels with values > 0
27 |             is `min(num_positives, int(positive_fraction * num_samples))`. The number
28 |             of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
29 |             In order words, if there are not enough positives, the sample is filled with
30 |             negatives. If there are also not enough negatives, then as many elements are
31 |             sampled as is possible.
32 |         bg_label (int): label index of background ("negative") class.
33 | 
34 |     Returns:
35 |         pos_idx, neg_idx (Tensor):
36 |             1D vector of indices. The total length of both is `num_samples` or fewer.
37 |     """
38 |     positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
39 |     negative = nonzero_tuple(labels == bg_label)[0]
40 | 
41 |     num_pos = int(num_samples * positive_fraction)
42 |     # protect against not enough positive examples
43 |     num_pos = min(positive.numel(), num_pos)
44 |     num_neg = num_samples - num_pos
45 |     # protect against not enough negative examples
46 |     num_neg = min(negative.numel(), num_neg)
47 | 
48 |     # randomly select positive and negative examples
49 |     perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |     perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |     pos_idx = positive[perm1]
53 |     neg_idx = negative[perm2]
54 |     return pos_idx, neg_idx
55 | 


--------------------------------------------------------------------------------
/detectron2/projects/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here.
3 | 


--------------------------------------------------------------------------------
/detectron2/projects/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import importlib
 3 | from pathlib import Path
 4 | 
 5 | _PROJECTS = {
 6 |     "point_rend": "PointRend",
 7 |     "deeplab": "DeepLab",
 8 |     "panoptic_deeplab": "Panoptic-DeepLab",
 9 | }
10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
11 | 
12 | if _PROJECT_ROOT.is_dir():
13 |     # This is true only for in-place installation (pip install -e, setup.py develop),
14 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
15 | 
16 |     class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
17 |         def find_spec(self, name, path, target=None):
18 |             if not name.startswith("detectron2.projects."):
19 |                 return
20 |             project_name = name.split(".")[-1]
21 |             project_dir = _PROJECTS.get(project_name)
22 |             if not project_dir:
23 |                 return
24 |             target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
25 |             if not target_file.is_file():
26 |                 return
27 |             return importlib.util.spec_from_file_location(name, target_file)
28 | 
29 |     import sys
30 | 
31 |     sys.meta_path.append(_D2ProjectsFinder())
32 | 


--------------------------------------------------------------------------------
/detectron2/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/detectron2/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import math
  3 | from bisect import bisect_right
  4 | from typing import List
  5 | import torch
  6 | 
  7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
  8 | # only on epoch boundaries. We typically use iteration based schedules instead.
  9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
 10 | # "iteration" instead.
 11 | 
 12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
 13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
 14 | 
 15 | 
 16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 17 |     def __init__(
 18 |         self,
 19 |         optimizer: torch.optim.Optimizer,
 20 |         milestones: List[int],
 21 |         gamma: float = 0.1,
 22 |         warmup_factor: float = 0.001,
 23 |         warmup_iters: int = 1000,
 24 |         warmup_method: str = "linear",
 25 |         last_epoch: int = -1,
 26 |     ):
 27 |         if not list(milestones) == sorted(milestones):
 28 |             raise ValueError(
 29 |                 "Milestones should be a list of" " increasing integers. Got {}", milestones
 30 |             )
 31 |         self.milestones = milestones
 32 |         self.gamma = gamma
 33 |         self.warmup_factor = warmup_factor
 34 |         self.warmup_iters = warmup_iters
 35 |         self.warmup_method = warmup_method
 36 |         super().__init__(optimizer, last_epoch)
 37 | 
 38 |     def get_lr(self) -> List[float]:
 39 |         warmup_factor = _get_warmup_factor_at_iter(
 40 |             self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
 41 |         )
 42 |         return [
 43 |             base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
 44 |             for base_lr in self.base_lrs
 45 |         ]
 46 | 
 47 |     def _compute_values(self) -> List[float]:
 48 |         # The new interface
 49 |         return self.get_lr()
 50 | 
 51 | 
 52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
 53 |     def __init__(
 54 |         self,
 55 |         optimizer: torch.optim.Optimizer,
 56 |         max_iters: int,
 57 |         warmup_factor: float = 0.001,
 58 |         warmup_iters: int = 1000,
 59 |         warmup_method: str = "linear",
 60 |         last_epoch: int = -1,
 61 |     ):
 62 |         self.max_iters = max_iters
 63 |         self.warmup_factor = warmup_factor
 64 |         self.warmup_iters = warmup_iters
 65 |         self.warmup_method = warmup_method
 66 |         super().__init__(optimizer, last_epoch)
 67 | 
 68 |     def get_lr(self) -> List[float]:
 69 |         warmup_factor = _get_warmup_factor_at_iter(
 70 |             self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
 71 |         )
 72 |         # Different definitions of half-cosine with warmup are possible. For
 73 |         # simplicity we multiply the standard half-cosine schedule by the warmup
 74 |         # factor. An alternative is to start the period of the cosine at warmup_iters
 75 |         # instead of at 0. In the case that warmup_iters << max_iters the two are
 76 |         # very close to each other.
 77 |         return [
 78 |             base_lr
 79 |             * warmup_factor
 80 |             * 0.5
 81 |             * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
 82 |             for base_lr in self.base_lrs
 83 |         ]
 84 | 
 85 |     def _compute_values(self) -> List[float]:
 86 |         # The new interface
 87 |         return self.get_lr()
 88 | 
 89 | 
 90 | def _get_warmup_factor_at_iter(
 91 |     method: str, iter: int, warmup_iters: int, warmup_factor: float
 92 | ) -> float:
 93 |     """
 94 |     Return the learning rate warmup factor at a specific iteration.
 95 |     See :paper:`ImageNet in 1h` for more details.
 96 | 
 97 |     Args:
 98 |         method (str): warmup method; either "constant" or "linear".
 99 |         iter (int): iteration at which to calculate the warmup factor.
100 |         warmup_iters (int): the number of warmup iterations.
101 |         warmup_factor (float): the base warmup factor (the meaning changes according
102 |             to the method used).
103 | 
104 |     Returns:
105 |         float: the effective warmup factor at the given iteration.
106 |     """
107 |     if iter >= warmup_iters:
108 |         return 1.0
109 | 
110 |     if method == "constant":
111 |         return warmup_factor
112 |     elif method == "linear":
113 |         alpha = iter / warmup_iters
114 |         return warmup_factor * (1 - alpha) + alpha
115 |     else:
116 |         raise ValueError("Unknown warmup method: {}".format(method))
117 | 


--------------------------------------------------------------------------------
/detectron2/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
 3 | from .image_list import ImageList
 4 | 
 5 | from .instances import Instances
 6 | from .keypoints import Keypoints, heatmaps_to_keypoints
 7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask
 8 | from .rotated_boxes import RotatedBoxes
 9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
10 | 
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
12 | 


--------------------------------------------------------------------------------
/detectron2/structures/image_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from __future__ import division
  3 | from typing import Any, List, Tuple
  4 | import torch
  5 | from torch import device
  6 | from torch.nn import functional as F
  7 | 
  8 | from detectron2.utils.env import TORCH_VERSION
  9 | 
 10 | 
 11 | class ImageList(object):
 12 |     """
 13 |     Structure that holds a list of images (of possibly
 14 |     varying sizes) as a single tensor.
 15 |     This works by padding the images to the same size,
 16 |     and storing in a field the original sizes of each image
 17 | 
 18 |     Attributes:
 19 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w)
 20 |     """
 21 | 
 22 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
 23 |         """
 24 |         Arguments:
 25 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 26 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
 27 |                 be smaller than (H, W) due to padding.
 28 |         """
 29 |         self.tensor = tensor
 30 |         self.image_sizes = image_sizes
 31 | 
 32 |     def __len__(self) -> int:
 33 |         return len(self.image_sizes)
 34 | 
 35 |     def __getitem__(self, idx) -> torch.Tensor:
 36 |         """
 37 |         Access the individual image in its original size.
 38 | 
 39 |         Args:
 40 |             idx: int or slice
 41 | 
 42 |         Returns:
 43 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 44 |         """
 45 |         size = self.image_sizes[idx]
 46 |         return self.tensor[idx, ..., : size[0], : size[1]]
 47 | 
 48 |     @torch.jit.unused
 49 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 50 |         cast_tensor = self.tensor.to(*args, **kwargs)
 51 |         return ImageList(cast_tensor, self.image_sizes)
 52 | 
 53 |     @property
 54 |     def device(self) -> device:
 55 |         return self.tensor.device
 56 | 
 57 |     @staticmethod
 58 |     def from_tensors(
 59 |         tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
 60 |     ) -> "ImageList":
 61 |         """
 62 |         Args:
 63 |             tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
 64 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
 65 |                 to the same shape with `pad_value`.
 66 |             size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
 67 |                 the common height and width is divisible by `size_divisibility`.
 68 |                 This depends on the model and many models need a divisibility of 32.
 69 |             pad_value (float): value to pad
 70 | 
 71 |         Returns:
 72 |             an `ImageList`.
 73 |         """
 74 |         assert len(tensors) > 0
 75 |         assert isinstance(tensors, (tuple, list))
 76 |         for t in tensors:
 77 |             assert isinstance(t, torch.Tensor), type(t)
 78 |             assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
 79 | 
 80 |         # Magic code below that handles dynamic shapes for both scripting and tracing ...
 81 | 
 82 |         image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
 83 | 
 84 |         if torch.jit.is_scripting():
 85 |             max_size = torch.stack([torch.as_tensor(x) for x in image_sizes]).max(0).values
 86 |             if size_divisibility > 1:
 87 |                 stride = size_divisibility
 88 |                 # the last two dims are H,W, both subject to divisibility requirement
 89 |                 max_size = (max_size + (stride - 1)) // stride * stride
 90 | 
 91 |             max_size: List[int] = max_size.to(dtype=torch.long).tolist()
 92 |         else:
 93 |             # https://github.com/pytorch/pytorch/issues/42448
 94 |             if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
 95 |                 # In tracing mode, x.shape[i] is a scalar Tensor, and should not be converted
 96 |                 # to int: this will cause the traced graph to have hard-coded shapes.
 97 |                 # Instead we convert each shape to a vector with a stack()
 98 |                 image_sizes = [torch.stack(x) for x in image_sizes]
 99 | 
100 |                 # maximum (H, W) for the last two dims
101 |                 # find the maximum in a tracable way
102 |                 max_size = torch.stack(image_sizes).max(0).values
103 |             else:
104 |                 # Original eager logic here -- not scripting, not tracing:
105 |                 # (can be unified with scripting after
106 |                 # https://github.com/pytorch/pytorch/issues/47379)
107 |                 max_size = torch.as_tensor(
108 |                     [max(s) for s in zip(*[img.shape[-2:] for img in tensors])]
109 |                 )
110 | 
111 |             if size_divisibility > 1:
112 |                 stride = size_divisibility
113 |                 # the last two dims are H,W, both subject to divisibility requirement
114 |                 max_size = (max_size + (stride - 1)) // stride * stride
115 | 
116 |         if len(tensors) == 1:
117 |             # This seems slightly (2%) faster.
118 |             # TODO: check whether it's faster for multiple images as well
119 |             image_size = image_sizes[0]
120 |             padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
121 |             batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
122 |         else:
123 |             # max_size can be a tensor in tracing mode, therefore convert to list
124 |             batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
125 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
126 |             for img, pad_img in zip(tensors, batched_imgs):
127 |                 pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
128 | 
129 |         return ImageList(batched_imgs.contiguous(), image_sizes)
130 | 


--------------------------------------------------------------------------------
/detectron2/structures/instances.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import itertools
  3 | from typing import Any, Dict, List, Tuple, Union
  4 | import torch
  5 | 
  6 | 
  7 | class Instances:
  8 |     """
  9 |     This class represents a list of instances in an image.
 10 |     It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
 11 |     All fields must have the same ``__len__`` which is the number of instances.
 12 | 
 13 |     All other (non-field) attributes of this class are considered private:
 14 |     they must start with '_' and are not modifiable by a user.
 15 | 
 16 |     Some basic usage:
 17 | 
 18 |     1. Set/get/check a field:
 19 | 
 20 |        .. code-block:: python
 21 | 
 22 |           instances.gt_boxes = Boxes(...)
 23 |           print(instances.pred_masks)  # a tensor of shape (N, H, W)
 24 |           print('gt_masks' in instances)
 25 | 
 26 |     2. ``len(instances)`` returns the number of instances
 27 |     3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
 28 |        and returns a new :class:`Instances`.
 29 |        Typically, ``indices`` is a integer vector of indices,
 30 |        or a binary mask of length ``num_instances``
 31 | 
 32 |        .. code-block:: python
 33 | 
 34 |           category_3_detections = instances[instances.pred_classes == 3]
 35 |           confident_detections = instances[instances.scores > 0.9]
 36 |     """
 37 | 
 38 |     def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
 39 |         """
 40 |         Args:
 41 |             image_size (height, width): the spatial size of the image.
 42 |             kwargs: fields to add to this `Instances`.
 43 |         """
 44 |         self._image_size = image_size
 45 |         self._fields: Dict[str, Any] = {}
 46 |         for k, v in kwargs.items():
 47 |             self.set(k, v)
 48 | 
 49 |     @property
 50 |     def image_size(self) -> Tuple[int, int]:
 51 |         """
 52 |         Returns:
 53 |             tuple: height, width
 54 |         """
 55 |         return self._image_size
 56 | 
 57 |     def __setattr__(self, name: str, val: Any) -> None:
 58 |         if name.startswith("_"):
 59 |             super().__setattr__(name, val)
 60 |         else:
 61 |             self.set(name, val)
 62 | 
 63 |     def __getattr__(self, name: str) -> Any:
 64 |         if name == "_fields" or name not in self._fields:
 65 |             raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
 66 |         return self._fields[name]
 67 | 
 68 |     def set(self, name: str, value: Any) -> None:
 69 |         """
 70 |         Set the field named `name` to `value`.
 71 |         The length of `value` must be the number of instances,
 72 |         and must agree with other existing fields in this object.
 73 |         """
 74 |         data_len = len(value)
 75 |         if len(self._fields):
 76 |             assert (
 77 |                 len(self) == data_len
 78 |             ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
 79 |         self._fields[name] = value
 80 | 
 81 |     def has(self, name: str) -> bool:
 82 |         """
 83 |         Returns:
 84 |             bool: whether the field called `name` exists.
 85 |         """
 86 |         return name in self._fields
 87 | 
 88 |     def remove(self, name: str) -> None:
 89 |         """
 90 |         Remove the field called `name`.
 91 |         """
 92 |         del self._fields[name]
 93 | 
 94 |     def get(self, name: str) -> Any:
 95 |         """
 96 |         Returns the field called `name`.
 97 |         """
 98 |         return self._fields[name]
 99 | 
100 |     def get_fields(self) -> Dict[str, Any]:
101 |         """
102 |         Returns:
103 |             dict: a dict which maps names (str) to data of the fields
104 | 
105 |         Modifying the returned dict will modify this instance.
106 |         """
107 |         return self._fields
108 | 
109 |     # Tensor-like methods
110 |     def to(self, *args: Any, **kwargs: Any) -> "Instances":
111 |         """
112 |         Returns:
113 |             Instances: all fields are called with a `to(device)`, if the field has this method.
114 |         """
115 |         ret = Instances(self._image_size)
116 |         for k, v in self._fields.items():
117 |             if hasattr(v, "to"):
118 |                 v = v.to(*args, **kwargs)
119 |             ret.set(k, v)
120 |         return ret
121 | 
122 |     def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
123 |         """
124 |         Args:
125 |             item: an index-like object and will be used to index all the fields.
126 | 
127 |         Returns:
128 |             If `item` is a string, return the data in the corresponding field.
129 |             Otherwise, returns an `Instances` where all fields are indexed by `item`.
130 |         """
131 |         if type(item) == int:
132 |             if item >= len(self) or item < -len(self):
133 |                 raise IndexError("Instances index out of range!")
134 |             else:
135 |                 item = slice(item, None, len(self))
136 | 
137 |         ret = Instances(self._image_size)
138 |         for k, v in self._fields.items():
139 |             ret.set(k, v[item])
140 |         return ret
141 | 
142 |     def __len__(self) -> int:
143 |         for v in self._fields.values():
144 |             # use __len__ because len() has to be int and is not friendly to tracing
145 |             return v.__len__()
146 |         raise NotImplementedError("Empty Instances does not support __len__!")
147 | 
148 |     def __iter__(self):
149 |         raise NotImplementedError("`Instances` object is not iterable!")
150 | 
151 |     @staticmethod
152 |     def cat(instance_lists: List["Instances"]) -> "Instances":
153 |         """
154 |         Args:
155 |             instance_lists (list[Instances])
156 | 
157 |         Returns:
158 |             Instances
159 |         """
160 |         assert all(isinstance(i, Instances) for i in instance_lists)
161 |         assert len(instance_lists) > 0
162 |         if len(instance_lists) == 1:
163 |             return instance_lists[0]
164 | 
165 |         image_size = instance_lists[0].image_size
166 |         for i in instance_lists[1:]:
167 |             assert i.image_size == image_size
168 |         ret = Instances(image_size)
169 |         for k in instance_lists[0]._fields.keys():
170 |             values = [i.get(k) for i in instance_lists]
171 |             v0 = values[0]
172 |             if isinstance(v0, torch.Tensor):
173 |                 values = torch.cat(values, dim=0)
174 |             elif isinstance(v0, list):
175 |                 values = list(itertools.chain(*values))
176 |             elif hasattr(type(v0), "cat"):
177 |                 values = type(v0).cat(values)
178 |             else:
179 |                 raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
180 |             ret.set(k, values)
181 |         return ret
182 | 
183 |     def __str__(self) -> str:
184 |         s = self.__class__.__name__ + "("
185 |         s += "num_instances={}, ".format(len(self))
186 |         s += "image_height={}, ".format(self._image_size[0])
187 |         s += "image_width={}, ".format(self._image_size[1])
188 |         s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
189 |         return s
190 | 
191 |     __repr__ = __str__
192 | 


--------------------------------------------------------------------------------
/detectron2/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/detectron2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/detectron2/utils/analysis.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | import typing
  6 | import torch
  7 | from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
  8 | from torch import nn
  9 | 
 10 | from detectron2.structures import BitMasks, Boxes, ImageList, Instances
 11 | 
 12 | from .logger import log_first_n
 13 | 
 14 | __all__ = [
 15 |     "activation_count_operators",
 16 |     "flop_count_operators",
 17 |     "parameter_count_table",
 18 |     "parameter_count",
 19 | ]
 20 | 
 21 | FLOPS_MODE = "flops"
 22 | ACTIVATIONS_MODE = "activations"
 23 | 
 24 | 
 25 | # some extra ops to ignore from counting.
 26 | _IGNORED_OPS = {
 27 |     "aten::add",
 28 |     "aten::add_",
 29 |     "aten::batch_norm",
 30 |     "aten::constant_pad_nd",
 31 |     "aten::div",
 32 |     "aten::div_",
 33 |     "aten::exp",
 34 |     "aten::log2",
 35 |     "aten::max_pool2d",
 36 |     "aten::meshgrid",
 37 |     "aten::mul",
 38 |     "aten::mul_",
 39 |     "aten::nonzero_numpy",
 40 |     "aten::rsub",
 41 |     "aten::sigmoid",
 42 |     "aten::sigmoid_",
 43 |     "aten::softmax",
 44 |     "aten::sort",
 45 |     "aten::sqrt",
 46 |     "aten::sub",
 47 |     "aten::upsample_nearest2d",
 48 |     "prim::PythonOp",
 49 |     "torchvision::nms",  # TODO estimate flop for nms
 50 | }
 51 | 
 52 | 
 53 | def flop_count_operators(
 54 |     model: nn.Module, inputs: list, **kwargs
 55 | ) -> typing.DefaultDict[str, float]:
 56 |     """
 57 |     Implement operator-level flops counting using jit.
 58 |     This is a wrapper of fvcore.nn.flop_count, that supports standard detection models
 59 |     in detectron2.
 60 | 
 61 |     Note:
 62 |         The function runs the input through the model to compute flops.
 63 |         The flops of a detection model is often input-dependent, for example,
 64 |         the flops of box & mask head depends on the number of proposals &
 65 |         the number of detected objects.
 66 |         Therefore, the flops counting using a single input may not accurately
 67 |         reflect the computation cost of a model.
 68 | 
 69 |     Args:
 70 |         model: a detectron2 model that takes `list[dict]` as input.
 71 |         inputs (list[dict]): inputs to model, in detectron2's standard format.
 72 |     """
 73 |     return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs)
 74 | 
 75 | 
 76 | def activation_count_operators(
 77 |     model: nn.Module, inputs: list, **kwargs
 78 | ) -> typing.DefaultDict[str, float]:
 79 |     """
 80 |     Implement operator-level activations counting using jit.
 81 |     This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
 82 |     in detectron2.
 83 | 
 84 |     Note:
 85 |         The function runs the input through the model to compute activations.
 86 |         The activations of a detection model is often input-dependent, for example,
 87 |         the activations of box & mask head depends on the number of proposals &
 88 |         the number of detected objects.
 89 | 
 90 |     Args:
 91 |         model: a detectron2 model that takes `list[dict]` as input.
 92 |         inputs (list[dict]): inputs to model, in detectron2's standard format.
 93 |     """
 94 |     return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
 95 | 
 96 | 
 97 | def _flatten_to_tuple(outputs):
 98 |     result = []
 99 |     if isinstance(outputs, torch.Tensor):
100 |         result.append(outputs)
101 |     elif isinstance(outputs, (list, tuple)):
102 |         for v in outputs:
103 |             result.extend(_flatten_to_tuple(v))
104 |     elif isinstance(outputs, dict):
105 |         for _, v in outputs.items():
106 |             result.extend(_flatten_to_tuple(v))
107 |     elif isinstance(outputs, Instances):
108 |         result.extend(_flatten_to_tuple(outputs.get_fields()))
109 |     elif isinstance(outputs, (Boxes, BitMasks, ImageList)):
110 |         result.append(outputs.tensor)
111 |     else:
112 |         log_first_n(
113 |             logging.WARN,
114 |             f"Output of type {type(outputs)} not included in flops/activations count.",
115 |             n=10,
116 |         )
117 |     return tuple(result)
118 | 
119 | 
120 | def _wrapper_count_operators(
121 |     model: nn.Module, inputs: list, mode: str, **kwargs
122 | ) -> typing.DefaultDict[str, float]:
123 | 
124 |     # ignore some ops
125 |     supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
126 |     supported_ops.update(kwargs.pop("supported_ops", {}))
127 |     kwargs["supported_ops"] = supported_ops
128 | 
129 |     assert len(inputs) == 1, "Please use batch size=1"
130 |     tensor_input = inputs[0]["image"]
131 | 
132 |     class WrapModel(nn.Module):
133 |         def __init__(self, model):
134 |             super().__init__()
135 |             if isinstance(
136 |                 model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)
137 |             ):
138 |                 self.model = model.module
139 |             else:
140 |                 self.model = model
141 | 
142 |         def forward(self, image):
143 |             # jit requires the input/output to be Tensors
144 |             inputs = [{"image": image}]
145 |             outputs = self.model.forward(inputs)
146 |             # Only the subgraph that computes the returned tuple of tensor will be
147 |             # counted. So we flatten everything we found to tuple of tensors.
148 |             return _flatten_to_tuple(outputs)
149 | 
150 |     old_train = model.training
151 |     with torch.no_grad():
152 |         if mode == FLOPS_MODE:
153 |             ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
154 |         elif mode == ACTIVATIONS_MODE:
155 |             ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
156 |         else:
157 |             raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
158 |     # compatible with change in fvcore
159 |     if isinstance(ret, tuple):
160 |         ret = ret[0]
161 |     model.train(old_train)
162 |     return ret
163 | 


--------------------------------------------------------------------------------
/detectron2/utils/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | """
  4 | An awesome colormap for really neat visualizations.
  5 | Copied from Detectron, and removed gray colors.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | __all__ = ["colormap", "random_color"]
 11 | 
 12 | # fmt: off
 13 | # RGB:
 14 | _COLORS = np.array(
 15 |     [
 16 |         0.000, 0.447, 0.741,
 17 |         0.850, 0.325, 0.098,
 18 |         0.929, 0.694, 0.125,
 19 |         0.494, 0.184, 0.556,
 20 |         0.466, 0.674, 0.188,
 21 |         0.301, 0.745, 0.933,
 22 |         0.635, 0.078, 0.184,
 23 |         0.300, 0.300, 0.300,
 24 |         0.600, 0.600, 0.600,
 25 |         1.000, 0.000, 0.000,
 26 |         1.000, 0.500, 0.000,
 27 |         0.749, 0.749, 0.000,
 28 |         0.000, 1.000, 0.000,
 29 |         0.000, 0.000, 1.000,
 30 |         0.667, 0.000, 1.000,
 31 |         0.333, 0.333, 0.000,
 32 |         0.333, 0.667, 0.000,
 33 |         0.333, 1.000, 0.000,
 34 |         0.667, 0.333, 0.000,
 35 |         0.667, 0.667, 0.000,
 36 |         0.667, 1.000, 0.000,
 37 |         1.000, 0.333, 0.000,
 38 |         1.000, 0.667, 0.000,
 39 |         1.000, 1.000, 0.000,
 40 |         0.000, 0.333, 0.500,
 41 |         0.000, 0.667, 0.500,
 42 |         0.000, 1.000, 0.500,
 43 |         0.333, 0.000, 0.500,
 44 |         0.333, 0.333, 0.500,
 45 |         0.333, 0.667, 0.500,
 46 |         0.333, 1.000, 0.500,
 47 |         0.667, 0.000, 0.500,
 48 |         0.667, 0.333, 0.500,
 49 |         0.667, 0.667, 0.500,
 50 |         0.667, 1.000, 0.500,
 51 |         1.000, 0.000, 0.500,
 52 |         1.000, 0.333, 0.500,
 53 |         1.000, 0.667, 0.500,
 54 |         1.000, 1.000, 0.500,
 55 |         0.000, 0.333, 1.000,
 56 |         0.000, 0.667, 1.000,
 57 |         0.000, 1.000, 1.000,
 58 |         0.333, 0.000, 1.000,
 59 |         0.333, 0.333, 1.000,
 60 |         0.333, 0.667, 1.000,
 61 |         0.333, 1.000, 1.000,
 62 |         0.667, 0.000, 1.000,
 63 |         0.667, 0.333, 1.000,
 64 |         0.667, 0.667, 1.000,
 65 |         0.667, 1.000, 1.000,
 66 |         1.000, 0.000, 1.000,
 67 |         1.000, 0.333, 1.000,
 68 |         1.000, 0.667, 1.000,
 69 |         0.333, 0.000, 0.000,
 70 |         0.500, 0.000, 0.000,
 71 |         0.667, 0.000, 0.000,
 72 |         0.833, 0.000, 0.000,
 73 |         1.000, 0.000, 0.000,
 74 |         0.000, 0.167, 0.000,
 75 |         0.000, 0.333, 0.000,
 76 |         0.000, 0.500, 0.000,
 77 |         0.000, 0.667, 0.000,
 78 |         0.000, 0.833, 0.000,
 79 |         0.000, 1.000, 0.000,
 80 |         0.000, 0.000, 0.167,
 81 |         0.000, 0.000, 0.333,
 82 |         0.000, 0.000, 0.500,
 83 |         0.000, 0.000, 0.667,
 84 |         0.000, 0.000, 0.833,
 85 |         0.000, 0.000, 1.000,
 86 |         0.000, 0.000, 0.000,
 87 |         0.143, 0.143, 0.143,
 88 |         0.857, 0.857, 0.857,
 89 |         1.000, 1.000, 1.000
 90 |     ]
 91 | ).astype(np.float32).reshape(-1, 3)
 92 | # fmt: on
 93 | 
 94 | 
 95 | def colormap(rgb=False, maximum=255):
 96 |     """
 97 |     Args:
 98 |         rgb (bool): whether to return RGB colors or BGR colors.
 99 |         maximum (int): either 255 or 1
100 | 
101 |     Returns:
102 |         ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
103 |     """
104 |     assert maximum in [255, 1], maximum
105 |     c = _COLORS * maximum
106 |     if not rgb:
107 |         c = c[:, ::-1]
108 |     return c
109 | 
110 | 
111 | def random_color(rgb=False, maximum=255):
112 |     """
113 |     Args:
114 |         rgb (bool): whether to return RGB colors or BGR colors.
115 |         maximum (int): either 255 or 1
116 | 
117 |     Returns:
118 |         ndarray: a vector of 3 numbers
119 |     """
120 |     idx = np.random.randint(0, len(_COLORS))
121 |     ret = _COLORS[idx] * maximum
122 |     if not rgb:
123 |         ret = ret[::-1]
124 |     return ret
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     import cv2
129 | 
130 |     size = 100
131 |     H, W = 10, 10
132 |     canvas = np.random.rand(H * size, W * size, 3).astype("float32")
133 |     for h in range(H):
134 |         for w in range(W):
135 |             idx = h * W + w
136 |             if idx >= len(_COLORS):
137 |                 break
138 |             canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
139 |     cv2.imshow("a", canvas)
140 |     cv2.waitKey(0)
141 | 


--------------------------------------------------------------------------------
/detectron2/utils/env.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import importlib
  3 | import importlib.util
  4 | import logging
  5 | import numpy as np
  6 | import os
  7 | import random
  8 | import sys
  9 | from datetime import datetime
 10 | import torch
 11 | 
 12 | __all__ = ["seed_all_rng"]
 13 | 
 14 | 
 15 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
 16 | """
 17 | PyTorch version as a tuple of 2 ints. Useful for comparison.
 18 | """
 19 | 
 20 | 
 21 | def seed_all_rng(seed=None):
 22 |     """
 23 |     Set the random seed for the RNG in torch, numpy and python.
 24 | 
 25 |     Args:
 26 |         seed (int): if None, will use a strong random seed.
 27 |     """
 28 |     if seed is None:
 29 |         seed = (
 30 |             os.getpid()
 31 |             + int(datetime.now().strftime("%S%f"))
 32 |             + int.from_bytes(os.urandom(2), "big")
 33 |         )
 34 |         logger = logging.getLogger(__name__)
 35 |         logger.info("Using a generated random seed {}".format(seed))
 36 |     np.random.seed(seed)
 37 |     torch.set_rng_state(torch.manual_seed(seed).get_state())
 38 |     random.seed(seed)
 39 | 
 40 | 
 41 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
 42 | def _import_file(module_name, file_path, make_importable=False):
 43 |     spec = importlib.util.spec_from_file_location(module_name, file_path)
 44 |     module = importlib.util.module_from_spec(spec)
 45 |     spec.loader.exec_module(module)
 46 |     if make_importable:
 47 |         sys.modules[module_name] = module
 48 |     return module
 49 | 
 50 | 
 51 | def _configure_libraries():
 52 |     """
 53 |     Configurations for some libraries.
 54 |     """
 55 |     # An environment option to disable `import cv2` globally,
 56 |     # in case it leads to negative performance impact
 57 |     disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
 58 |     if disable_cv2:
 59 |         sys.modules["cv2"] = None
 60 |     else:
 61 |         # Disable opencl in opencv since its interaction with cuda often has negative effects
 62 |         # This envvar is supported after OpenCV 3.4.0
 63 |         os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
 64 |         try:
 65 |             import cv2
 66 | 
 67 |             if int(cv2.__version__.split(".")[0]) >= 3:
 68 |                 cv2.ocl.setUseOpenCL(False)
 69 |         except ModuleNotFoundError:
 70 |             # Other types of ImportError, if happened, should not be ignored.
 71 |             # Because a failed opencv import could mess up address space
 72 |             # https://github.com/skvark/opencv-python/issues/381
 73 |             pass
 74 | 
 75 |     def get_version(module, digit=2):
 76 |         return tuple(map(int, module.__version__.split(".")[:digit]))
 77 | 
 78 |     # fmt: off
 79 |     assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
 80 |     import fvcore
 81 |     assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2"
 82 |     import yaml
 83 |     assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
 84 |     # fmt: on
 85 | 
 86 | 
 87 | _ENV_SETUP_DONE = False
 88 | 
 89 | 
 90 | def setup_environment():
 91 |     """Perform environment setup work. The default setup is a no-op, but this
 92 |     function allows the user to specify a Python source file or a module in
 93 |     the $DETECTRON2_ENV_MODULE environment variable, that performs
 94 |     custom setup work that may be necessary to their computing environment.
 95 |     """
 96 |     global _ENV_SETUP_DONE
 97 |     if _ENV_SETUP_DONE:
 98 |         return
 99 |     _ENV_SETUP_DONE = True
100 | 
101 |     _configure_libraries()
102 | 
103 |     custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
104 | 
105 |     if custom_module_path:
106 |         setup_custom_environment(custom_module_path)
107 |     else:
108 |         # The default setup is a no-op
109 |         pass
110 | 
111 | 
112 | def setup_custom_environment(custom_module):
113 |     """
114 |     Load custom environment setup by importing a Python source file or a
115 |     module, and run the setup function.
116 |     """
117 |     if custom_module.endswith(".py"):
118 |         module = _import_file("detectron2.utils.env.custom_module", custom_module)
119 |     else:
120 |         module = importlib.import_module(custom_module)
121 |     assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
122 |         "Custom environment module defined in {} does not have the "
123 |         "required callable attribute 'setup_environment'."
124 |     ).format(custom_module)
125 |     module.setup_environment()
126 | 


--------------------------------------------------------------------------------
/detectron2/utils/file_io.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase
 2 | 
 3 | __all__ = ["PathManager", "PathHandler"]
 4 | 
 5 | 
 6 | PathManager = PathManagerBase()
 7 | """
 8 | This is a detectron2 project-specific PathManager.
 9 | We try to stay away from global PathManager in fvcore as it
10 | introduces potential conflicts among other libraries.
11 | """
12 | 
13 | 
14 | class Detectron2Handler(PathHandler):
15 |     """
16 |     Resolve anything that's hosted under detectron2's namespace.
17 |     """
18 | 
19 |     PREFIX = "detectron2://"
20 |     S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
21 | 
22 |     def _get_supported_prefixes(self):
23 |         return [self.PREFIX]
24 | 
25 |     def _get_local_path(self, path):
26 |         name = path[len(self.PREFIX) :]
27 |         return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
28 | 
29 |     def _open(self, path, mode="r", **kwargs):
30 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
31 | 
32 | 
33 | PathManager.register_handler(HTTPURLHandler())
34 | PathManager.register_handler(OneDrivePathHandler())
35 | PathManager.register_handler(Detectron2Handler())
36 | 


--------------------------------------------------------------------------------
/detectron2/utils/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import logging
 4 | from contextlib import contextmanager
 5 | from functools import wraps
 6 | import torch
 7 | 
 8 | __all__ = ["retry_if_cuda_oom"]
 9 | 
10 | 
11 | @contextmanager
12 | def _ignore_torch_cuda_oom():
13 |     """
14 |     A context which ignores CUDA OOM exception from pytorch.
15 |     """
16 |     try:
17 |         yield
18 |     except RuntimeError as e:
19 |         # NOTE: the string may change?
20 |         if "CUDA out of memory. " in str(e):
21 |             pass
22 |         else:
23 |             raise
24 | 
25 | 
26 | def retry_if_cuda_oom(func):
27 |     """
28 |     Makes a function retry itself after encountering
29 |     pytorch's CUDA OOM error.
30 |     It will first retry after calling `torch.cuda.empty_cache()`.
31 | 
32 |     If that still fails, it will then retry by trying to convert inputs to CPUs.
33 |     In this case, it expects the function to dispatch to CPU implementation.
34 |     The return values may become CPU tensors as well and it's user's
35 |     responsibility to convert it back to CUDA tensor if needed.
36 | 
37 |     Args:
38 |         func: a stateless callable that takes tensor-like objects as arguments
39 | 
40 |     Returns:
41 |         a callable which retries `func` if OOM is encountered.
42 | 
43 |     Examples:
44 |     ::
45 |         output = retry_if_cuda_oom(some_torch_function)(input1, input2)
46 |         # output may be on CPU even if inputs are on GPU
47 | 
48 |     Note:
49 |         1. When converting inputs to CPU, it will only look at each argument and check
50 |            if it has `.device` and `.to` for conversion. Nested structures of tensors
51 |            are not supported.
52 | 
53 |         2. Since the function might be called more than once, it has to be
54 |            stateless.
55 |     """
56 | 
57 |     def maybe_to_cpu(x):
58 |         try:
59 |             like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
60 |         except AttributeError:
61 |             like_gpu_tensor = False
62 |         if like_gpu_tensor:
63 |             return x.to(device="cpu")
64 |         else:
65 |             return x
66 | 
67 |     @wraps(func)
68 |     def wrapped(*args, **kwargs):
69 |         with _ignore_torch_cuda_oom():
70 |             return func(*args, **kwargs)
71 | 
72 |         # Clear cache and retry
73 |         torch.cuda.empty_cache()
74 |         with _ignore_torch_cuda_oom():
75 |             return func(*args, **kwargs)
76 | 
77 |         # Try on CPU. This slows down the code significantly, therefore print a notice.
78 |         logger = logging.getLogger(__name__)
79 |         logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
80 |         new_args = (maybe_to_cpu(x) for x in args)
81 |         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
82 |         return func(*new_args, **new_kwargs)
83 | 
84 |     return wrapped
85 | 


--------------------------------------------------------------------------------
/detectron2/utils/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | # Keep this module for backward compatibility.
4 | from fvcore.common.registry import Registry  # noqa
5 | 
6 | __all__ = ["Registry"]
7 | 


--------------------------------------------------------------------------------
/detectron2/utils/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 | 
15 |     def __init__(self, obj):
16 |         self._obj = obj
17 | 
18 |     def __reduce__(self):
19 |         s = cloudpickle.dumps(self._obj)
20 |         return cloudpickle.loads, (s,)
21 | 
22 |     def __call__(self, *args, **kwargs):
23 |         return self._obj(*args, **kwargs)
24 | 
25 |     def __getattr__(self, attr):
26 |         # Ensure that the wrapped object can be used seamlessly as the previous object.
27 |         if attr not in ["_obj"]:
28 |             return getattr(self._obj, attr)
29 |         return getattr(self, attr)
30 | 


--------------------------------------------------------------------------------
/detectron2/utils/testing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from detectron2 import model_zoo
 5 | from detectron2.data import DatasetCatalog
 6 | from detectron2.data.detection_utils import read_image
 7 | from detectron2.modeling import build_model
 8 | from detectron2.structures import Boxes
 9 | from detectron2.utils.file_io import PathManager
10 | 
11 | 
12 | """
13 | Internal utilities for tests. Don't use except for writing tests.
14 | """
15 | 
16 | 
17 | def get_model_no_weights(config_path):
18 |     """
19 |     Like model_zoo.get, but do not load any weights (even pretrained)
20 |     """
21 |     cfg = model_zoo.get_config(config_path)
22 |     if not torch.cuda.is_available():
23 |         cfg.MODEL.DEVICE = "cpu"
24 |     return build_model(cfg)
25 | 
26 | 
27 | def random_boxes(num_boxes, max_coord=100, device="cpu"):
28 |     """
29 |     Create a random Nx4 boxes tensor, with coordinates < max_coord.
30 |     """
31 |     boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5)
32 |     boxes.clamp_(min=1.0)  # tiny boxes cause numerical instability in box regression
33 |     # Note: the implementation of this function in torchvision is:
34 |     # boxes[:, 2:] += torch.rand(N, 2) * 100
35 |     # but it does not guarantee non-negative widths/heights constraints:
36 |     # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
37 |     boxes[:, 2:] += boxes[:, :2]
38 |     return boxes
39 | 
40 | 
41 | def get_sample_coco_image(tensor=True):
42 |     """
43 |     Args:
44 |         tensor (bool): if True, returns 3xHxW tensor.
45 |             else, returns a HxWx3 numpy array.
46 | 
47 |     Returns:
48 |         an image, in BGR color.
49 |     """
50 |     try:
51 |         file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"]
52 |         if not PathManager.exists(file_name):
53 |             raise FileNotFoundError()
54 |     except IOError:
55 |         # for public CI to run
56 |         file_name = "http://images.cocodataset.org/train2017/000000000009.jpg"
57 |     ret = read_image(file_name, format="BGR")
58 |     if tensor:
59 |         ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1)))
60 |     return ret
61 | 
62 | 
63 | def assert_instances_allclose(input, other, rtol=1e-5, msg=""):
64 |     """
65 |     Args:
66 |         input, other (Instances):
67 |     """
68 |     if not msg:
69 |         msg = "Two Instances are different! "
70 |     else:
71 |         msg = msg.rstrip() + " "
72 |     assert input.image_size == other.image_size, (
73 |         msg + f"image_size is {input.image_size} vs. {other.image_size}!"
74 |     )
75 |     fields = sorted(input.get_fields().keys())
76 |     fields_other = sorted(other.get_fields().keys())
77 |     assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!"
78 | 
79 |     for f in fields:
80 |         val1, val2 = input.get(f), other.get(f)
81 |         if isinstance(val1, Boxes):
82 |             # boxes in the range of O(100) and can have a larger tolerance
83 |             assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), (
84 |                 msg + f"Field {f} differs too much!"
85 |             )
86 |         elif isinstance(val1, torch.Tensor):
87 |             if val1.dtype.is_floating_point:
88 |                 mag = torch.abs(val1).max().cpu().item()
89 |                 assert torch.allclose(val1, val2, atol=mag * rtol), (
90 |                     msg + f"Field {f} differs too much!"
91 |                 )
92 |             else:
93 |                 assert torch.equal(val1, val2), msg + f"Field {f} is different!"
94 |         else:
95 |             raise ValueError(f"Don't know how to compare type {type(val1)}")
96 | 


--------------------------------------------------------------------------------
/projects/YOSO/configs/ade20k/panoptic-segmentation/Base-ADE20K-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 3 |   PIXEL_STD: [58.395, 57.120, 57.375]
 4 |   META_ARCHITECTURE: "YOSO"
 5 | DATASETS:
 6 |   TRAIN: ("ade20k_panoptic_train",)
 7 |   TEST: ("ade20k_panoptic_val",)
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 16
10 |   BASE_LR: 0.0001
11 |   MAX_ITER: 160000
12 |   WARMUP_FACTOR: 1.0
13 |   WARMUP_ITERS: 0
14 |   WEIGHT_DECAY: 0.05
15 |   OPTIMIZER: "ADAMW"
16 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
17 |   BACKBONE_MULTIPLIER: 0.1
18 |   CLIP_GRADIENTS:
19 |     ENABLED: True
20 |     CLIP_TYPE: "full_model"
21 |     CLIP_VALUE: 0.01
22 |     NORM_TYPE: 2.0
23 |   AMP:
24 |     ENABLED: False
25 | INPUT:
26 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"]
27 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
28 |   MIN_SIZE_TEST: 640
29 |   MAX_SIZE_TRAIN: 2560
30 |   MAX_SIZE_TEST: 2560
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "absolute"
34 |     SIZE: (640, 640)
35 |     SINGLE_CATEGORY_MAX_AREA: 1.0
36 |   COLOR_AUG_SSD: True
37 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
38 |   FORMAT: "RGB"
39 |   DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 |   EVAL_PERIOD: 5000
42 |   AUG:
43 |     ENABLED: False
44 |     MIN_SIZES: [320, 480, 640, 800, 960, 1120]
45 |     MAX_SIZE: 4480
46 |     FLIP: True
47 | DATALOADER:
48 |   FILTER_EMPTY_ANNOTATIONS: True
49 |   NUM_WORKERS: 4
50 | VERSION: 2
51 | # CUDNN_BENCHMARK: True
52 | 


--------------------------------------------------------------------------------
/projects/YOSO/configs/ade20k/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20K-PanopticSegmentation.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "build_resnet_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 |   YOSO:
12 |     SIZE_DIVISIBILITY: 32
13 |     # Structure
14 |     NUM_CLASSES: 150
15 |     NUM_STAGES: 2
16 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 |     HIDDEN_DIM: 256
18 |     NUM_PROPOSALS: 100
19 |     CONV_KERNEL_SIZE_2D: 1
20 |     CONV_KERNEL_SIZE_1D: 3
21 |     NUM_CLS_FCS: 1
22 |     NUM_MASK_FCS: 1
23 |     # Loss
24 |     NO_OBJECT_WEIGHT: 0.1
25 |     CLASS_WEIGHT: 2.0
26 |     MASK_WEIGHT: 5.0
27 |     DICE_WEIGHT: 5.0
28 |     TRAIN_NUM_POINTS: 12544
29 |     OVERSAMPLE_RATIO: 3.0
30 |     IMPORTANCE_SAMPLE_RATIO: 0.75
31 |     TEMPERATIRE: 0.5 #1.0
32 |     TEST:
33 |       SEMANTIC_ON: False #True
34 |       INSTANCE_ON: False #True
35 |       PANOPTIC_ON: True
36 |       OVERLAP_THRESHOLD: 0.8
37 |       OBJECT_MASK_THRESHOLD: 0.2
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_ade20k"
39 | 


--------------------------------------------------------------------------------
/projects/YOSO/configs/cityscapes/panoptic-segmentation/Base-Cityscapes-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 3 |   PIXEL_STD: [58.395, 57.120, 57.375]
 4 |   META_ARCHITECTURE: "YOSO"
 5 | DATASETS:
 6 |   TRAIN: ("cityscapes_fine_panoptic_train",)
 7 |   TEST: ("cityscapes_fine_panoptic_val",)
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 16
10 |   BASE_LR: 0.0001
11 |   MAX_ITER: 180000 #90000 #
12 |   WARMUP_FACTOR: 1.0
13 |   WARMUP_ITERS: 0
14 |   WEIGHT_DECAY: 0.05
15 |   OPTIMIZER: "ADAMW"
16 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
17 |   BACKBONE_MULTIPLIER: 0.1
18 |   CLIP_GRADIENTS:
19 |     ENABLED: True
20 |     CLIP_TYPE: "full_model"
21 |     CLIP_VALUE: 0.01
22 |     NORM_TYPE: 2.0
23 |   AMP:
24 |     ENABLED: False
25 | INPUT:
26 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"]
27 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
28 |   MAX_SIZE_TRAIN: 4096
29 |   MIN_SIZE_TEST: 512 #1024
30 |   MAX_SIZE_TEST: 1024 #2048
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "absolute"
34 |     SIZE: (512, 1024)
35 |     SINGLE_CATEGORY_MAX_AREA: 1.0
36 |   COLOR_AUG_SSD: True
37 |   SIZE_DIVISIBILITY: -1
38 |   FORMAT: "RGB"
39 |   DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 |   EVAL_PERIOD: 5000
42 |   AUG:
43 |     ENABLED: False
44 |     MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792]
45 |     MAX_SIZE: 4096
46 |     FLIP: True
47 | DATALOADER:
48 |   FILTER_EMPTY_ANNOTATIONS: True
49 |   NUM_WORKERS: 4
50 | VERSION: 2
51 | CUDNN_BENCHMARK: True


--------------------------------------------------------------------------------
/projects/YOSO/configs/cityscapes/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Cityscapes-PanopticSegmentation.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "build_resnet_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     STRIDE_IN_1X1: False
 9 |     NORM: "SyncBN"  # use syncbn for cityscapes dataset
10 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
11 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
12 |   YOSO:
13 |     SIZE_DIVISIBILITY: 32
14 |     # Structure
15 |     NUM_CLASSES: 19
16 |     NUM_STAGES: 2
17 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
18 |     HIDDEN_DIM: 256
19 |     NUM_PROPOSALS: 100
20 |     CONV_KERNEL_SIZE_2D: 1
21 |     CONV_KERNEL_SIZE_1D: 3
22 |     NUM_CLS_FCS: 3 #1
23 |     NUM_MASK_FCS: 3 #1
24 |     # Loss
25 |     NO_OBJECT_WEIGHT: 0.1
26 |     CLASS_WEIGHT: 2.0
27 |     MASK_WEIGHT: 5.0
28 |     DICE_WEIGHT: 5.0
29 |     TRAIN_NUM_POINTS: 12544
30 |     OVERSAMPLE_RATIO: 3.0
31 |     IMPORTANCE_SAMPLE_RATIO: 0.75
32 |     TEMPERATIRE: 0.05
33 |     TEST:
34 |       SEMANTIC_ON: False #True
35 |       INSTANCE_ON: False #True
36 |       PANOPTIC_ON: True
37 |       OVERLAP_THRESHOLD: 0.8
38 |       OBJECT_MASK_THRESHOLD: 0.8 #0.5
39 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_cityscapes"


--------------------------------------------------------------------------------
/projects/YOSO/configs/coco/panoptic-segmentation/Base-COCO-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 3 |   PIXEL_STD: [58.395, 57.120, 57.375]
 4 |   META_ARCHITECTURE: "YOSO"
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_train_panoptic",)
 7 |   TEST: ("coco_2017_val_panoptic_with_sem_seg",)  # to evaluate instance and semantic performance as well
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 16
10 |   BASE_LR: 0.0001
11 |   STEPS: (327778, 355092)
12 |   MAX_ITER: 368750
13 |   WARMUP_FACTOR: 1.0
14 |   WARMUP_ITERS: 10
15 |   WEIGHT_DECAY: 0.05
16 |   OPTIMIZER: "ADAMW"
17 |   BACKBONE_MULTIPLIER: 0.1
18 |   CLIP_GRADIENTS:
19 |     ENABLED: True
20 |     CLIP_TYPE: "full_model"
21 |     CLIP_VALUE: 0.01
22 |     NORM_TYPE: 2.0
23 |   AMP:
24 |     ENABLED: False
25 | INPUT:
26 |   IMAGE_SIZE: 1024
27 |   MIN_SCALE: 0.1
28 |   MAX_SCALE: 2.0
29 |   FORMAT: "RGB"
30 |   DATASET_MAPPER_NAME: "yoso_panoptic_lsj"
31 |   MIN_SIZE_TEST: 800 # 550 #512
32 |   MAX_SIZE_TEST: 1333 # 800 #800
33 | TEST:
34 |   EVAL_PERIOD: 5000
35 | DATALOADER:
36 |   FILTER_EMPTY_ANNOTATIONS: True
37 |   NUM_WORKERS: 4
38 | VERSION: 2
39 | # CUDNN_BENCHMARK: True


--------------------------------------------------------------------------------
/projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-PanopticSegmentation.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "build_resnet_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 |   YOSO:
12 |     SIZE_DIVISIBILITY: 32
13 |     # Structure
14 |     NUM_CLASSES: 133
15 |     NUM_STAGES: 2
16 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 |     HIDDEN_DIM: 256
18 |     NUM_PROPOSALS: 100
19 |     CONV_KERNEL_SIZE_2D: 1
20 |     CONV_KERNEL_SIZE_1D: 3
21 |     NUM_CLS_FCS: 1
22 |     NUM_MASK_FCS: 1
23 |     # Loss
24 |     NO_OBJECT_WEIGHT: 0.1
25 |     CLASS_WEIGHT: 2.0
26 |     MASK_WEIGHT: 5.0
27 |     DICE_WEIGHT: 5.0
28 |     TRAIN_NUM_POINTS: 12544
29 |     OVERSAMPLE_RATIO: 3.0
30 |     IMPORTANCE_SAMPLE_RATIO: 0.75
31 |     TEMPERATIRE: 0.05
32 |     TEST:
33 |       SEMANTIC_ON: False
34 |       INSTANCE_ON: False
35 |       PANOPTIC_ON: True
36 |       OVERLAP_THRESHOLD: 0.8
37 |       OBJECT_MASK_THRESHOLD: 0.7 
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_coco"


--------------------------------------------------------------------------------
/projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/Base-MapillaryVistas-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 3 |   PIXEL_STD: [58.395, 57.120, 57.375]
 4 |   META_ARCHITECTURE: "YOSO"
 5 | DATASETS:
 6 |   TRAIN: ("mapillary_vistas_panoptic_train",)
 7 |   TEST: ("mapillary_vistas_panoptic_val",)
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 16
10 |   BASE_LR: 0.0001
11 |   MAX_ITER: 300000
12 |   WARMUP_FACTOR: 1.0
13 |   WARMUP_ITERS: 0
14 |   WEIGHT_DECAY: 0.05
15 |   OPTIMIZER: "ADAMW"
16 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
17 |   BACKBONE_MULTIPLIER: 0.1
18 |   CLIP_GRADIENTS:
19 |     ENABLED: True
20 |     CLIP_TYPE: "full_model"
21 |     CLIP_VALUE: 0.01
22 |     NORM_TYPE: 2.0
23 |   AMP:
24 |     ENABLED: False
25 | INPUT:
26 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 2048) for x in range(5, 21)]"]
27 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
28 |   MIN_SIZE_TEST: 2048
29 |   MAX_SIZE_TRAIN: 8192
30 |   MAX_SIZE_TEST: 2048
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "absolute"
34 |     SIZE: (1024, 1024)
35 |     SINGLE_CATEGORY_MAX_AREA: 1.0
36 |   COLOR_AUG_SSD: True
37 |   SIZE_DIVISIBILITY: -1 #1024 # used in dataset mapper
38 |   FORMAT: "RGB"
39 |   DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 |   EVAL_PERIOD: 20000 #5000
42 | DATALOADER:
43 |   FILTER_EMPTY_ANNOTATIONS: True
44 |   NUM_WORKERS: 4
45 | VERSION: 2


--------------------------------------------------------------------------------
/projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-MapillaryVistas-PanopticSegmentation.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "build_resnet_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 |   YOSO:
12 |     SIZE_DIVISIBILITY: 32
13 |     # Structure
14 |     NUM_CLASSES: 65
15 |     NUM_STAGES: 2
16 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 |     HIDDEN_DIM: 256
18 |     NUM_PROPOSALS: 150 #100
19 |     CONV_KERNEL_SIZE_2D: 1
20 |     CONV_KERNEL_SIZE_1D: 3
21 |     NUM_CLS_FCS: 3 #1
22 |     NUM_MASK_FCS: 3 #1
23 |     # Loss
24 |     NO_OBJECT_WEIGHT: 0.1
25 |     CLASS_WEIGHT: 2.0
26 |     MASK_WEIGHT: 5.0
27 |     DICE_WEIGHT: 5.0
28 |     TRAIN_NUM_POINTS: 12544
29 |     OVERSAMPLE_RATIO: 3.0
30 |     IMPORTANCE_SAMPLE_RATIO: 0.75
31 |     TEMPERATIRE: 0.1 #0.5 
32 |     TEST:
33 |       SEMANTIC_ON: False
34 |       INSTANCE_ON: False
35 |       PANOPTIC_ON: True
36 |       OVERLAP_THRESHOLD: 0.8
37 |       OBJECT_MASK_THRESHOLD: 0.0
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_mapillary"
39 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import add_yoso_config
2 | from .segmentator import YOSO
3 | from . import data
4 | from .data.dataset_mappers.yoso_instance_lsj_dataset_mapper import YOSOInstanceLSJDatasetMapper
5 | from .data.dataset_mappers.yoso_panoptic_lsj_dataset_mapper import YOSOPanopticLSJDatasetMapper
6 | from .data.dataset_mappers.yoso_instance_dataset_mapper import YOSOInstanceDatasetMapper
7 | from .data.dataset_mappers.yoso_panoptic_dataset_mapper import  YOSOPanopticDatasetMapper
8 | from .data.dataset_mappers.yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper
9 | from .utils import build_lr_scheduler, SemanticSegmentorWithTTA


--------------------------------------------------------------------------------
/projects/YOSO/yoso/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode as CN
 2 | 
 3 | def add_yoso_config(cfg):
 4 |     cfg.MODEL.YOSO = CN()
 5 |     cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32
 6 |     cfg.MODEL.YOSO.NUM_CLASSES = 133
 7 |     cfg.MODEL.YOSO.NUM_STAGES = 2
 8 |     
 9 |     cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"]
10 |     cfg.MODEL.YOSO.HIDDEN_DIM = 256
11 |     cfg.MODEL.YOSO.AGG_DIM = 128
12 |     cfg.MODEL.YOSO.NUM_PROPOSALS = 100
13 |     cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1
14 |     cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3
15 |     cfg.MODEL.YOSO.NUM_CLS_FCS = 1
16 |     cfg.MODEL.YOSO.NUM_MASK_FCS = 1
17 | 
18 |     cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1
19 |     cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0
20 |     cfg.MODEL.YOSO.MASK_WEIGHT = 5.0
21 |     cfg.MODEL.YOSO.DICE_WEIGHT = 5.0
22 |     cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112
23 |     cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0
24 |     cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75
25 |     cfg.MODEL.YOSO.TEMPERATIRE = 0.1
26 | 
27 |     cfg.MODEL.YOSO.TEST = CN()
28 |     cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False
29 |     cfg.MODEL.YOSO.TEST.INSTANCE_ON = False
30 |     cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False
31 |     cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0
32 |     cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0
33 |     cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
34 |     
35 |     cfg.SOLVER.OPTIMIZER = "ADAMW"
36 |     cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
37 |     cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
38 |     cfg.SOLVER.WEIGHT_DECAY_BIAS = None
39 |     
40 |     cfg.SOLVER.POLY_LR_POWER = 0.9
41 |     cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0
42 | 
43 |     cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj"
44 |     cfg.INPUT.SIZE_DIVISIBILITY = -1
45 |     cfg.INPUT.COLOR_AUG_SSD = False
46 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
47 | 
48 |     cfg.INPUT.IMAGE_SIZE = 1024
49 |     cfg.INPUT.MIN_SCALE = 0.1
50 |     cfg.INPUT.MAX_SCALE = 2.0
51 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | import logging
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from torch.nn import functional as F
  8 | 
  9 | from detectron2.config import configurable
 10 | from detectron2.data import detection_utils as utils
 11 | from detectron2.data import transforms as T
 12 | from detectron2.structures import BitMasks, Instances
 13 | 
 14 | from .yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper
 15 | 
 16 | __all__ = ["YOSOPanopticDatasetMapper"]
 17 | 
 18 | 
 19 | class YOSOPanopticDatasetMapper(YOSOSemanticDatasetMapper):
 20 |     """
 21 |     A callable which takes a dataset dict in Detectron2 Dataset format,
 22 |     and map it into a format used by MaskFormer for panoptic segmentation.
 23 | 
 24 |     The callable currently does the following:
 25 | 
 26 |     1. Read the image from "file_name"
 27 |     2. Applies geometric transforms to the image and annotation
 28 |     3. Find and applies suitable cropping to the image and annotation
 29 |     4. Prepare image and annotation to Tensors
 30 |     """
 31 | 
 32 |     @configurable
 33 |     def __init__(
 34 |         self,
 35 |         is_train=True,
 36 |         *,
 37 |         augmentations,
 38 |         image_format,
 39 |         ignore_label,
 40 |         size_divisibility,
 41 |     ):
 42 |         """
 43 |         NOTE: this interface is experimental.
 44 |         Args:
 45 |             is_train: for training or inference
 46 |             augmentations: a list of augmentations or deterministic transforms to apply
 47 |             image_format: an image format supported by :func:`detection_utils.read_image`.
 48 |             ignore_label: the label that is ignored to evaluation
 49 |             size_divisibility: pad image size to be divisible by this value
 50 |         """
 51 |         super().__init__(
 52 |             is_train,
 53 |             augmentations=augmentations,
 54 |             image_format=image_format,
 55 |             ignore_label=ignore_label,
 56 |             size_divisibility=size_divisibility,
 57 |         )
 58 | 
 59 |     def __call__(self, dataset_dict):
 60 |         """
 61 |         Args:
 62 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
 63 | 
 64 |         Returns:
 65 |             dict: a format that builtin models in detectron2 accept
 66 |         """
 67 |         assert self.is_train, "YOSOPanopticDatasetMapper should only be used for training!"
 68 | 
 69 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
 70 |         image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
 71 |         utils.check_image_size(dataset_dict, image)
 72 | 
 73 |         # semantic segmentation
 74 |         if "sem_seg_file_name" in dataset_dict:
 75 |             # PyTorch transformation not implemented for uint16, so converting it to double first
 76 |             sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
 77 |         else:
 78 |             sem_seg_gt = None
 79 | 
 80 |         # panoptic segmentation
 81 |         if "pan_seg_file_name" in dataset_dict:
 82 |             pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
 83 |             segments_info = dataset_dict["segments_info"]
 84 |         else:
 85 |             pan_seg_gt = None
 86 |             segments_info = None
 87 | 
 88 |         if pan_seg_gt is None:
 89 |             raise ValueError(
 90 |                 "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format(
 91 |                     dataset_dict["file_name"]
 92 |                 )
 93 |             )
 94 | 
 95 |         aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
 96 |         aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
 97 |         image = aug_input.image
 98 |         if sem_seg_gt is not None:
 99 |             sem_seg_gt = aug_input.sem_seg
100 | 
101 |         # apply the same transformation to panoptic segmentation
102 |         pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
103 | 
104 |         from panopticapi.utils import rgb2id
105 | 
106 |         pan_seg_gt = rgb2id(pan_seg_gt)
107 | 
108 |         # Pad image and segmentation label here!
109 |         image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
110 |         if sem_seg_gt is not None:
111 |             sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
112 |         pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long"))
113 | 
114 |         if self.size_divisibility > 0:
115 |             image_size = (image.shape[-2], image.shape[-1])
116 |             padding_size = [
117 |                 0,
118 |                 self.size_divisibility - image_size[1],
119 |                 0,
120 |                 self.size_divisibility - image_size[0],
121 |             ]
122 |             image = F.pad(image, padding_size, value=128).contiguous()
123 |             if sem_seg_gt is not None:
124 |                 sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
125 |             pan_seg_gt = F.pad(
126 |                 pan_seg_gt, padding_size, value=0
127 |             ).contiguous()  # 0 is the VOID panoptic label
128 | 
129 |         image_shape = (image.shape[-2], image.shape[-1])  # h, w
130 | 
131 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
132 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
133 |         # Therefore it's important to use torch.Tensor.
134 |         dataset_dict["image"] = image
135 |         if sem_seg_gt is not None:
136 |             dataset_dict["sem_seg"] = sem_seg_gt.long()
137 | 
138 |         if "annotations" in dataset_dict:
139 |             raise ValueError("Pemantic segmentation dataset should not have 'annotations'.")
140 | 
141 |         # Prepare per-category binary masks
142 |         pan_seg_gt = pan_seg_gt.numpy()
143 |         instances = Instances(image_shape)
144 |         classes = []
145 |         masks = []
146 |         for segment_info in segments_info:
147 |             class_id = segment_info["category_id"]
148 |             if not segment_info["iscrowd"]:
149 |                 classes.append(class_id)
150 |                 masks.append(pan_seg_gt == segment_info["id"])
151 | 
152 |         classes = np.array(classes)
153 |         instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
154 |         if len(masks) == 0:
155 |             # Some image does not have annotation (all ignored)
156 |             instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
157 |         else:
158 |             masks = BitMasks(
159 |                 torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
160 |             )
161 |             instances.gt_masks = masks.tensor
162 | 
163 |         dataset_dict["instances"] = instances
164 | 
165 |         return dataset_dict
166 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_lsj_dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
  3 | import copy
  4 | import logging
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from detectron2.config import configurable
 10 | from detectron2.data import detection_utils as utils
 11 | from detectron2.data import transforms as T
 12 | from detectron2.data.transforms import TransformGen
 13 | from detectron2.structures import BitMasks, Boxes, Instances
 14 | 
 15 | __all__ = ["YOSOPanopticLSJDatasetMapper"]
 16 | 
 17 | 
 18 | def build_transform_gen(cfg, is_train):
 19 |     """
 20 |     Create a list of default :class:`Augmentation` from config.
 21 |     Now it includes resizing and flipping.
 22 |     Returns:
 23 |         list[Augmentation]
 24 |     """
 25 |     assert is_train, "Only support training augmentation"
 26 |     image_size = cfg.INPUT.IMAGE_SIZE
 27 |     min_scale = cfg.INPUT.MIN_SCALE
 28 |     max_scale = cfg.INPUT.MAX_SCALE
 29 | 
 30 |     augmentation = []
 31 | 
 32 |     if cfg.INPUT.RANDOM_FLIP != "none":
 33 |         augmentation.append(
 34 |             T.RandomFlip(
 35 |                 horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
 36 |                 vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
 37 |             )
 38 |         )
 39 | 
 40 |     augmentation.extend([
 41 |         T.ResizeScale(
 42 |             min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
 43 |         ),
 44 |         T.FixedSizeCrop(crop_size=(image_size, image_size)),
 45 |     ])
 46 | 
 47 |     return augmentation
 48 | 
 49 | 
 50 | # This is specifically designed for the COCO dataset.
 51 | class YOSOPanopticLSJDatasetMapper:
 52 |     """
 53 |     A callable which takes a dataset dict in Detectron2 Dataset format,
 54 |     and map it into a format used by MaskFormer.
 55 | 
 56 |     This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
 57 | 
 58 |     The callable currently does the following:
 59 | 
 60 |     1. Read the image from "file_name"
 61 |     2. Applies geometric transforms to the image and annotation
 62 |     3. Find and applies suitable cropping to the image and annotation
 63 |     4. Prepare image and annotation to Tensors
 64 |     """
 65 | 
 66 |     @configurable
 67 |     def __init__(
 68 |         self,
 69 |         is_train=True,
 70 |         *,
 71 |         tfm_gens,
 72 |         image_format,
 73 |     ):
 74 |         """
 75 |         NOTE: this interface is experimental.
 76 |         Args:
 77 |             is_train: for training or inference
 78 |             augmentations: a list of augmentations or deterministic transforms to apply
 79 |             crop_gen: crop augmentation
 80 |             tfm_gens: data augmentation
 81 |             image_format: an image format supported by :func:`detection_utils.read_image`.
 82 |         """
 83 |         self.tfm_gens = tfm_gens
 84 |         logging.getLogger(__name__).info(
 85 |             "[YOSOPanopticLSJDatasetMapper] Full TransformGens used in training: {}".format(
 86 |                 str(self.tfm_gens)
 87 |             )
 88 |         )
 89 | 
 90 |         self.img_format = image_format
 91 |         self.is_train = is_train
 92 | 
 93 |     @classmethod
 94 |     def from_config(cls, cfg, is_train=True):
 95 |         # Build augmentation
 96 |         tfm_gens = build_transform_gen(cfg, is_train)
 97 | 
 98 |         ret = {
 99 |             "is_train": is_train,
100 |             "tfm_gens": tfm_gens,
101 |             "image_format": cfg.INPUT.FORMAT,
102 |         }
103 |         return ret
104 | 
105 |     def __call__(self, dataset_dict):
106 |         """
107 |         Args:
108 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
109 | 
110 |         Returns:
111 |             dict: a format that builtin models in detectron2 accept
112 |         """
113 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
114 |         image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
115 |         utils.check_image_size(dataset_dict, image)
116 | 
117 |         image, transforms = T.apply_transform_gens(self.tfm_gens, image)
118 |         image_shape = image.shape[:2]  # h, w
119 | 
120 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
121 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
122 |         # Therefore it's important to use torch.Tensor.
123 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
124 | 
125 |         if not self.is_train:
126 |             # USER: Modify this if you want to keep them for some reason.
127 |             dataset_dict.pop("annotations", None)
128 |             return dataset_dict
129 | 
130 |         if "pan_seg_file_name" in dataset_dict:
131 |             pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
132 |             segments_info = dataset_dict["segments_info"]
133 | 
134 |             # apply the same transformation to panoptic segmentation
135 |             pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
136 | 
137 |             from panopticapi.utils import rgb2id
138 | 
139 |             pan_seg_gt = rgb2id(pan_seg_gt)
140 | 
141 |             instances = Instances(image_shape)
142 |             classes = []
143 |             masks = []
144 |             for segment_info in segments_info:
145 |                 class_id = segment_info["category_id"]
146 |                 if not segment_info["iscrowd"]:
147 |                     classes.append(class_id)
148 |                     masks.append(pan_seg_gt == segment_info["id"])
149 | 
150 |             classes = np.array(classes)
151 |             instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
152 |             if len(masks) == 0:
153 |                 # Some image does not have annotation (all ignored)
154 |                 instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
155 |                 instances.gt_boxes = Boxes(torch.zeros((0, 4)))
156 |             else:
157 |                 masks = BitMasks(
158 |                     torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
159 |                 )
160 |                 instances.gt_masks = masks.tensor
161 |                 instances.gt_boxes = masks.get_bounding_boxes()
162 | 
163 |             dataset_dict["instances"] = instances
164 | 
165 |         return dataset_dict
166 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import (
 3 |     register_ade20k_full,
 4 |     register_ade20k_panoptic,
 5 |     register_coco_stuff_10k,
 6 |     register_mapillary_vistas,
 7 |     register_coco_panoptic_annos_semseg,
 8 |     register_ade20k_instance,
 9 |     register_mapillary_vistas_panoptic,
10 | )
11 | 


--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/datasets/register_ade20k_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import json
 3 | import logging
 4 | import numpy as np
 5 | import os
 6 | from PIL import Image
 7 | 
 8 | from detectron2.data import DatasetCatalog, MetadataCatalog
 9 | from detectron2.data.datasets.coco import load_coco_json, register_coco_instances
10 | from detectron2.utils.file_io import PathManager
11 | 
12 | ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}]
13 | 
14 | 
15 | _PREDEFINED_SPLITS = {
16 |     # point annotations without masks
17 |     "ade20k_instance_train": (
18 |         "ADEChallengeData2016/images/training",
19 |         "ADEChallengeData2016/ade20k_instance_train.json",
20 |     ),
21 |     "ade20k_instance_val": (
22 |         "ADEChallengeData2016/images/validation",
23 |         "ADEChallengeData2016/ade20k_instance_val.json",
24 |     ),
25 | }
26 | 
27 | 
28 | def _get_ade_instances_meta():
29 |     thing_ids = [k["id"] for k in ADE_CATEGORIES]
30 |     assert len(thing_ids) == 100, len(thing_ids)
31 |     # Mapping from the incontiguous ADE category id to an id in [0, 99]
32 |     thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
33 |     thing_classes = [k["name"] for k in ADE_CATEGORIES]
34 |     ret = {
35 |         "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
36 |         "thing_classes": thing_classes,
37 |     }
38 |     return ret
39 | 
40 | 
41 | def register_all_ade20k_instance(root):
42 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS.items():
43 |         # Assume pre-defined datasets live in `./datasets`.
44 |         register_coco_instances(
45 |             key,
46 |             _get_ade_instances_meta(),
47 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
48 |             os.path.join(root, image_root),
49 |         )
50 | 
51 | 
52 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
53 | register_all_ade20k_instance(_root)
54 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=3
 4 | include_trailing_comma=True
 5 | known_standard_library=numpy,setuptools,mock
 6 | skip=./datasets,docs
 7 | skip_glob=*/__init__.py
 8 | known_myself=detectron2
 9 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx
10 | no_lines_before=STDLIB,THIRDPARTY
11 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
12 | default_section=FIRSTPARTY
13 | 
14 | [mypy]
15 | python_version=3.6
16 | ignore_missing_imports = True
17 | warn_unused_configs = True
18 | disallow_untyped_defs = True
19 | check_untyped_defs = True
20 | warn_unused_ignores = True
21 | warn_redundant_casts = True
22 | show_column_numbers = True
23 | follow_imports = silent
24 | allow_redefinition = True
25 | ; Require all functions to be annotated
26 | disallow_incomplete_defs = True
27 | 


--------------------------------------------------------------------------------