├── .gitignore
├── LICENSE
├── README.md
├── demo
├── config.py
├── demo.py
├── evaluate_pq.ipynb
├── fig.png
├── head_Latency_and_FLOPs.ipynb
├── neck_Latency_and_FLOPs.ipynb
└── predictor.py
├── detectron2
├── __init__.py
├── checkpoint
│ ├── __init__.py
│ ├── c2_model_loading.py
│ ├── catalog.py
│ └── detection_checkpoint.py
├── config
│ ├── __init__.py
│ ├── compat.py
│ ├── config.py
│ └── defaults.py
├── data
│ ├── __init__.py
│ ├── build.py
│ ├── catalog.py
│ ├── common.py
│ ├── dataset_mapper.py
│ ├── datasets
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── builtin.py
│ │ ├── builtin_meta.py
│ │ ├── cityscapes.py
│ │ ├── cityscapes_panoptic.py
│ │ ├── coco.py
│ │ ├── coco_panoptic.py
│ │ ├── crowdhuman.py
│ │ ├── lvis.py
│ │ ├── lvis_v0_5_categories.py
│ │ ├── lvis_v1_categories.py
│ │ ├── mot.py
│ │ ├── pascal_voc.py
│ │ └── register_coco.py
│ ├── detection_utils.py
│ ├── samplers
│ │ ├── __init__.py
│ │ ├── distributed_sampler.py
│ │ └── grouped_batch_sampler.py
│ └── transforms
│ │ ├── __init__.py
│ │ ├── augmentation.py
│ │ ├── augmentation_impl.py
│ │ └── transform.py
├── engine
│ ├── __init__.py
│ ├── defaults.py
│ ├── hooks.py
│ ├── launch.py
│ └── train_loop.py
├── evaluation
│ ├── __init__.py
│ ├── cityscapes_evaluation.py
│ ├── coco_evaluation.py
│ ├── evaluator.py
│ ├── fast_eval_api.py
│ ├── lvis_evaluation.py
│ ├── panoptic_evaluation.py
│ ├── pascal_voc_evaluation.py
│ ├── rotated_coco_evaluation.py
│ ├── sem_seg_evaluation.py
│ └── testing.py
├── export
│ ├── README.md
│ ├── __init__.py
│ ├── api.py
│ ├── c10.py
│ ├── caffe2_export.py
│ ├── caffe2_inference.py
│ ├── caffe2_modeling.py
│ ├── caffe2_patch.py
│ ├── shared.py
│ ├── torchscript.py
│ └── torchscript_patch.py
├── layers
│ ├── __init__.py
│ ├── aspp.py
│ ├── batch_norm.py
│ ├── blocks.py
│ ├── csrc
│ │ ├── README.md
│ │ ├── ROIAlign
│ │ │ ├── ROIAlign.h
│ │ │ ├── ROIAlign_cpu.cpp
│ │ │ └── ROIAlign_cuda.cu
│ │ ├── ROIAlignRotated
│ │ │ ├── ROIAlignRotated.h
│ │ │ ├── ROIAlignRotated_cpu.cpp
│ │ │ └── ROIAlignRotated_cuda.cu
│ │ ├── box_iou_rotated
│ │ │ ├── box_iou_rotated.h
│ │ │ ├── box_iou_rotated_cpu.cpp
│ │ │ ├── box_iou_rotated_cuda.cu
│ │ │ └── box_iou_rotated_utils.h
│ │ ├── cocoeval
│ │ │ ├── cocoeval.cpp
│ │ │ └── cocoeval.h
│ │ ├── cuda_version.cu
│ │ ├── deformable
│ │ │ ├── deform_conv.h
│ │ │ ├── deform_conv_cuda.cu
│ │ │ └── deform_conv_cuda_kernel.cu
│ │ ├── nms_rotated
│ │ │ ├── nms_rotated.h
│ │ │ ├── nms_rotated_cpu.cpp
│ │ │ └── nms_rotated_cuda.cu
│ │ └── vision.cpp
│ ├── deform_conv.py
│ ├── mask_ops.py
│ ├── nms.py
│ ├── roi_align.py
│ ├── roi_align_rotated.py
│ ├── rotated_boxes.py
│ ├── shape_spec.py
│ └── wrappers.py
├── model_zoo
│ ├── __init__.py
│ └── model_zoo.py
├── modeling
│ ├── __init__.py
│ ├── anchor_generator.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── backbone.py
│ │ ├── build.py
│ │ ├── fpn.py
│ │ └── resnet.py
│ ├── box_regression.py
│ ├── matcher.py
│ ├── meta_arch
│ │ ├── __init__.py
│ │ ├── build.py
│ │ ├── panoptic_fpn.py
│ │ ├── rcnn.py
│ │ ├── retinanet.py
│ │ └── semantic_seg.py
│ ├── poolers.py
│ ├── postprocessing.py
│ ├── proposal_generator
│ │ ├── __init__.py
│ │ ├── build.py
│ │ ├── proposal_utils.py
│ │ ├── rpn.py
│ │ └── rrpn.py
│ ├── roi_heads
│ │ ├── __init__.py
│ │ ├── box_head.py
│ │ ├── cascade_rcnn.py
│ │ ├── fast_rcnn.py
│ │ ├── keypoint_head.py
│ │ ├── mask_head.py
│ │ ├── roi_heads.py
│ │ └── rotated_fast_rcnn.py
│ ├── sampling.py
│ └── test_time_augmentation.py
├── projects
│ ├── README.md
│ └── __init__.py
├── solver
│ ├── __init__.py
│ ├── build.py
│ └── lr_scheduler.py
├── structures
│ ├── __init__.py
│ ├── boxes.py
│ ├── image_list.py
│ ├── instances.py
│ ├── keypoints.py
│ ├── masks.py
│ └── rotated_boxes.py
└── utils
│ ├── README.md
│ ├── __init__.py
│ ├── analysis.py
│ ├── collect_env.py
│ ├── colormap.py
│ ├── comm.py
│ ├── env.py
│ ├── events.py
│ ├── file_io.py
│ ├── logger.py
│ ├── memory.py
│ ├── registry.py
│ ├── serialize.py
│ ├── testing.py
│ ├── video_visualizer.py
│ └── visualizer.py
├── projects
└── YOSO
│ ├── configs
│ ├── ade20k
│ │ └── panoptic-segmentation
│ │ │ ├── Base-ADE20K-PanopticSegmentation.yaml
│ │ │ └── YOSO-R50.yaml
│ ├── cityscapes
│ │ └── panoptic-segmentation
│ │ │ ├── Base-Cityscapes-PanopticSegmentation.yaml
│ │ │ └── YOSO-R50.yaml
│ ├── coco
│ │ └── panoptic-segmentation
│ │ │ ├── Base-COCO-PanopticSegmentation.yaml
│ │ │ └── YOSO-R50.yaml
│ └── mapillary-vistas
│ │ └── panoptic-segmentation
│ │ ├── Base-MapillaryVistas-PanopticSegmentation.yaml
│ │ └── YOSO-R50.yaml
│ ├── train_net.py
│ └── yoso
│ ├── __init__.py
│ ├── config.py
│ ├── data
│ ├── __init__.py
│ ├── dataset_mappers
│ │ ├── yoso_instance_dataset_mapper.py
│ │ ├── yoso_instance_lsj_dataset_mapper.py
│ │ ├── yoso_panoptic_dataset_mapper.py
│ │ ├── yoso_panoptic_lsj_dataset_mapper.py
│ │ └── yoso_semantic_dataset_mapper.py
│ └── datasets
│ │ ├── __init__.py
│ │ ├── register_ade20k_full.py
│ │ ├── register_ade20k_instance.py
│ │ ├── register_ade20k_panoptic.py
│ │ ├── register_coco_panoptic_annos_semseg.py
│ │ ├── register_coco_stuff_10k.py
│ │ ├── register_mapillary_vistas.py
│ │ └── register_mapillary_vistas_panoptic.py
│ ├── head.py
│ ├── loss.py
│ ├── neck.py
│ ├── segmentator.py
│ └── utils.py
├── setup.cfg
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # output dir
2 | output*
3 |
4 |
5 | *.png
6 | *.json
7 | *.diff
8 | *.jpg
9 | !/projects/DensePose/doc/images/*.jpg
10 |
11 | # compilation and distribution
12 | __pycache__
13 | _ext
14 | *.pyc
15 | *.pyd
16 | *.so
17 | *.dll
18 | *.egg-info/
19 | build/
20 | dist/
21 | wheels/
22 |
23 | # pytorch/python/numpy formats
24 | *.pth
25 | *.pkl
26 | *.npy
27 | *.ts
28 | model_ts*.txt
29 |
30 | # ipython/jupyter notebooks
31 | # *.ipynb
32 | **/.ipynb_checkpoints/
33 |
34 | # Editor temporaries
35 | *.swn
36 | *.swo
37 | *.swp
38 | *~
39 |
40 | # editor settings
41 | .idea
42 | .vscode
43 | _darcs
44 |
45 | # project dirs
46 | # /detectron2/model_zoo/configs
47 | /datasets/*
48 | !/datasets/*.*
49 | /projects/*/datasets
50 | /models
51 | /snippet
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Jie Hu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is the project page for paper:
2 | >[**You Only Segment Once: Towards Real-Time Panoptic Segmentation**](https://arxiv.org/abs/2303.14651), In CVPR 2023.
3 |
4 |
5 |
6 | ## Model Zoo
7 |
8 | On COCO validation set:
9 | | Backbone | Scale | PQ | FPS| GPU | Model
10 | |:---:|:---:|:---:|:---:|:---:|:---:|
11 | |R50|800,1333|48.4|23.6|V100| [model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth) |
12 | |R50|512,800|46.4|45.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_coco.pth)|
13 |
14 | On Cityscapes validation set:
15 | | Backbone | Scale | PQ | FPS| GPU | Model
16 | |:---:|:---:|:---:|:---:|:---:|:---:|
17 | |R50|1024,2048|59.7|11.1|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)|
18 | |R50|512,1024|52.5|22.6|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_cityscapes.pth)|
19 |
20 | On ADE20k validation set:
21 | | Backbone | Scale | PQ | FPS| GPU | Model
22 | |:---:|:---:|:---:|:---:|:---:|:---:|
23 | |R50|640,2560|38.0|35.4|V100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_ade20k.pth)|
24 |
25 | On Mapillary Vistas validation set:
26 | | Backbone | Scale | PQ | FPS| GPU | Model
27 | |:---:|:---:|:---:|:---:|:---:|:---:|
28 | |R50|2048,2048|34.1|7.1|A100|[model](https://github.com/hujiecpp/YOSO/releases/download/v0.1/yoso_res50_mapillary.pth)|
29 |
30 | ## Getting Started
31 | ### Installation
32 | We recommend to use [Anaconda](https://www.anaconda.com/) for installation.
33 | ```bash
34 | conda create -n YOSO python=3.8 -y
35 | conda activate YOSO
36 | conda install pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch
37 | pip install pycocotools -i https://pypi.douban.com/simple
38 | pip install git+https://github.com/cocodataset/panopticapi.git
39 | git clone https://github.com/hujiecpp/YOSO.git
40 | cd YOSO
41 | python setup.py develop
42 | ```
43 |
44 | ### Datasets Preparation
45 | See [Preparing Datasets for Mask2Former](https://github.com/facebookresearch/Mask2Former/tree/main/datasets).
46 |
47 | ### Training & Evaluation
48 |
49 | - Train YOSO (e.g., on COCO dataset with R50 backbone).
50 | ```bash
51 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml
52 | ```
53 |
54 | - Evaluate YOSO (e.g., on COCO dataset with R50 backbone).
55 | ```bash
56 | python projects/YOSO/train_net.py --num-gpus 4 --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --eval-only MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth
57 | ```
58 |
59 | ### Inference on Custom Image or Video
60 |
61 | - Run YOSO demo (e.g., on video with R50 backbone).
62 | ```bash
63 | python demo/demo.py --config-file projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml --video-input input_video.mp4 --output output_video.mp4 --opts MODEL.WEIGHTS ./model_zoo/yoso_res50_coco.pth
64 | ```
65 |
66 | ## Acknowledgements
67 |
68 | - [Mask2Former](https://github.com/facebookresearch/Mask2Former)
69 | - [K-Net](https://github.com/ZwwWayne/K-Net)
70 |
71 | ## Citing YOSO
72 |
73 | If YOSO helps your research, please cite it in your publications:
74 |
75 | ```BibTeX
76 | @inproceedings{hu2023you,
77 | title={You Only Segment Once: Towards Real-Time Panoptic Segmentation},
78 | author={Hu, Jie and Huang, Linyan and Ren, Tianhe and Zhang, Shengchuan and Ji, Rongrong and Cao, Liujuan},
79 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
80 | pages={17819--17829},
81 | year={2023}
82 | }
83 | ```
84 |
--------------------------------------------------------------------------------
/demo/config.py:
--------------------------------------------------------------------------------
1 | from detectron2.config import CfgNode as CN
2 |
3 | def add_yoso_config(cfg):
4 | cfg.MODEL.YOSO = CN()
5 | cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32
6 | cfg.MODEL.YOSO.NUM_CLASSES = 133
7 | cfg.MODEL.YOSO.NUM_STAGES = 2
8 |
9 | cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"]
10 | cfg.MODEL.YOSO.HIDDEN_DIM = 256
11 | cfg.MODEL.YOSO.AGG_DIM = 128
12 | cfg.MODEL.YOSO.NUM_PROPOSALS = 100
13 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1
14 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3
15 | cfg.MODEL.YOSO.NUM_CLS_FCS = 1
16 | cfg.MODEL.YOSO.NUM_MASK_FCS = 1
17 |
18 | cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1
19 | cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0
20 | cfg.MODEL.YOSO.MASK_WEIGHT = 5.0
21 | cfg.MODEL.YOSO.DICE_WEIGHT = 5.0
22 | cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112
23 | cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0
24 | cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75
25 | cfg.MODEL.YOSO.TEMPERATIRE = 0.1
26 |
27 | cfg.MODEL.YOSO.TEST = CN()
28 | cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False
29 | cfg.MODEL.YOSO.TEST.INSTANCE_ON = False
30 | cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False
31 | cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0
32 | cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0
33 | cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
34 |
35 | cfg.SOLVER.OPTIMIZER = "ADAMW"
36 | cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
37 | cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
38 | cfg.SOLVER.WEIGHT_DECAY_BIAS = None
39 |
40 | cfg.SOLVER.POLY_LR_POWER = 0.9
41 | cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0
42 |
43 | cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj"
44 | cfg.INPUT.SIZE_DIVISIBILITY = -1
45 | cfg.INPUT.COLOR_AUG_SSD = False
46 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
47 |
48 | cfg.INPUT.IMAGE_SIZE = 1024
49 | cfg.INPUT.MIN_SCALE = 0.1
50 | cfg.INPUT.MAX_SCALE = 2.0
51 |
52 |
--------------------------------------------------------------------------------
/demo/fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hujiecpp/YOSO/04b898d395ffd8318aa3761b0b2b6d20b3514f26/demo/fig.png
--------------------------------------------------------------------------------
/detectron2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from .utils.env import setup_environment
4 |
5 | setup_environment()
6 |
7 |
8 | # This line will be programatically read/write by setup.py.
9 | # Leave them at the bottom of this file and don't touch them.
10 | __version__ = "0.3"
11 |
--------------------------------------------------------------------------------
/detectron2/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | # File:
4 |
5 |
6 | from . import catalog as _UNUSED # register the handler
7 | from .detection_checkpoint import DetectionCheckpointer
8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
9 |
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 |
--------------------------------------------------------------------------------
/detectron2/checkpoint/catalog.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import logging
3 |
4 | from detectron2.utils.file_io import PathHandler, PathManager
5 |
6 |
7 | class ModelCatalog(object):
8 | """
9 | Store mappings from names to third-party models.
10 | """
11 |
12 | S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
13 |
14 | # MSRA models have STRIDE_IN_1X1=True. False otherwise.
15 | # NOTE: all BN models here have fused BN into an affine layer.
16 | # As a result, you should only load them to a model with "FrozenBN".
17 | # Loading them to a model with regular BN or SyncBN is wrong.
18 | # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
19 | # which should be negligible for training.
20 | # NOTE: all models here uses PIXEL_STD=[1,1,1]
21 | # NOTE: Most of the BN models here are no longer used. We use the
22 | # re-converted pre-trained models under detectron2 model zoo instead.
23 | C2_IMAGENET_MODELS = {
24 | "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
25 | "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
26 | "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
27 | "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
28 | "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
29 | "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
30 | "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
31 | }
32 |
33 | C2_DETECTRON_PATH_FORMAT = (
34 | "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
35 | )
36 |
37 | C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
38 | C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
39 |
40 | # format: {model_name} -> part of the url
41 | C2_DETECTRON_MODELS = {
42 | "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
43 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
44 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
45 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
46 | "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
47 | "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
48 | "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
49 | "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
50 | "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
51 | "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
52 | "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
53 | "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
54 | "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
55 | }
56 |
57 | @staticmethod
58 | def get(name):
59 | if name.startswith("Caffe2Detectron/COCO"):
60 | return ModelCatalog._get_c2_detectron_baseline(name)
61 | if name.startswith("ImageNetPretrained/"):
62 | return ModelCatalog._get_c2_imagenet_pretrained(name)
63 | raise RuntimeError("model not present in the catalog: {}".format(name))
64 |
65 | @staticmethod
66 | def _get_c2_imagenet_pretrained(name):
67 | prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
68 | name = name[len("ImageNetPretrained/") :]
69 | name = ModelCatalog.C2_IMAGENET_MODELS[name]
70 | url = "/".join([prefix, name])
71 | return url
72 |
73 | @staticmethod
74 | def _get_c2_detectron_baseline(name):
75 | name = name[len("Caffe2Detectron/COCO/") :]
76 | url = ModelCatalog.C2_DETECTRON_MODELS[name]
77 | if "keypoint_rcnn" in name:
78 | dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
79 | else:
80 | dataset = ModelCatalog.C2_DATASET_COCO
81 |
82 | if "35998355/rpn_R-50-C4_1x" in name:
83 | # this one model is somehow different from others ..
84 | type = "rpn"
85 | else:
86 | type = "generalized_rcnn"
87 |
88 | # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
89 | url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
90 | prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
91 | )
92 | return url
93 |
94 |
95 | class ModelCatalogHandler(PathHandler):
96 | """
97 | Resolve URL like catalog://.
98 | """
99 |
100 | PREFIX = "catalog://"
101 |
102 | def _get_supported_prefixes(self):
103 | return [self.PREFIX]
104 |
105 | def _get_local_path(self, path):
106 | logger = logging.getLogger(__name__)
107 | catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
108 | logger.info("Catalog entry {} points to {}".format(path, catalog_path))
109 | return PathManager.get_local_path(catalog_path)
110 |
111 | def _open(self, path, mode="r", **kwargs):
112 | return PathManager.open(self._get_local_path(path), mode, **kwargs)
113 |
114 |
115 | PathManager.register_handler(ModelCatalogHandler())
116 |
--------------------------------------------------------------------------------
/detectron2/checkpoint/detection_checkpoint.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import pickle
3 | from fvcore.common.checkpoint import Checkpointer
4 |
5 | import detectron2.utils.comm as comm
6 | from detectron2.utils.file_io import PathManager
7 |
8 | from .c2_model_loading import align_and_update_state_dicts
9 |
10 |
11 | class DetectionCheckpointer(Checkpointer):
12 | """
13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
14 | model zoo, and apply conversions for legacy models.
15 | """
16 |
17 | def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
18 | is_main_process = comm.is_main_process()
19 | super().__init__(
20 | model,
21 | save_dir,
22 | save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
23 | **checkpointables,
24 | )
25 | if hasattr(self, "path_manager"):
26 | self.path_manager = PathManager
27 | else:
28 | # This could only happen for open source
29 | # TODO remove after upgrading fvcore
30 | from fvcore.common.file_io import PathManager as g_PathManager
31 |
32 | for handler in PathManager._path_handlers.values():
33 | try:
34 | g_PathManager.register_handler(handler)
35 | except KeyError:
36 | pass
37 |
38 | def _load_file(self, filename):
39 | if filename.endswith(".pkl"):
40 | with PathManager.open(filename, "rb") as f:
41 | data = pickle.load(f, encoding="latin1")
42 | if "model" in data and "__author__" in data:
43 | # file is in Detectron2 model zoo format
44 | self.logger.info("Reading a file from '{}'".format(data["__author__"]))
45 | return data
46 | else:
47 | # assume file is from Caffe2 / Detectron1 model zoo
48 | if "blobs" in data:
49 | # Detection models have "blobs", but ImageNet models don't
50 | data = data["blobs"]
51 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
52 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
53 |
54 | loaded = super()._load_file(filename) # load native pth checkpoint
55 | if "model" not in loaded:
56 | loaded = {"model": loaded}
57 | return loaded
58 |
59 | def _load_model(self, checkpoint):
60 | if checkpoint.get("matching_heuristics", False):
61 | self._convert_ndarray_to_tensor(checkpoint["model"])
62 | # convert weights by name-matching heuristics
63 | model_state_dict = self.model.state_dict()
64 | align_and_update_state_dicts(
65 | model_state_dict,
66 | checkpoint["model"],
67 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
68 | )
69 | checkpoint["model"] = model_state_dict
70 | # for non-caffe2 models, use standard ways to load it
71 | incompatible = super()._load_model(checkpoint)
72 | if incompatible is None: # support older versions of fvcore
73 | return None
74 |
75 | model_buffers = dict(self.model.named_buffers(recurse=False))
76 | for k in ["pixel_mean", "pixel_std"]:
77 | # Ignore missing key message about pixel_mean/std.
78 | # Though they may be missing in old checkpoints, they will be correctly
79 | # initialized from config anyway.
80 | if k in model_buffers:
81 | try:
82 | incompatible.missing_keys.remove(k)
83 | except ValueError:
84 | pass
85 | return incompatible
86 |
--------------------------------------------------------------------------------
/detectron2/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .compat import downgrade_config, upgrade_config
3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
4 |
5 | __all__ = [
6 | "CfgNode",
7 | "get_cfg",
8 | "global_cfg",
9 | "set_global_cfg",
10 | "downgrade_config",
11 | "upgrade_config",
12 | "configurable",
13 | ]
14 |
--------------------------------------------------------------------------------
/detectron2/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import transforms # isort:skip
3 |
4 | from .build import (
5 | build_batch_data_loader,
6 | build_detection_test_loader,
7 | build_detection_train_loader,
8 | get_detection_dataset_dicts,
9 | load_proposals_into_dataset,
10 | print_instances_class_histogram,
11 | )
12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata
13 | from .common import DatasetFromList, MapDataset
14 | from .dataset_mapper import DatasetMapper
15 |
16 | # ensure the builtin datasets are registered
17 | from . import datasets, samplers # isort:skip
18 |
19 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
20 |
--------------------------------------------------------------------------------
/detectron2/data/datasets/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ### Common Datasets
4 |
5 | The dataset implemented here do not need to load the data into the final format.
6 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
7 |
8 | For example, for an image dataset, just provide the file names and labels, but don't read the images.
9 | Let the downstream decide how to read.
10 |
--------------------------------------------------------------------------------
/detectron2/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances
3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
5 | from .pascal_voc import load_voc_instances, register_pascal_voc
6 | from . import builtin as _builtin # ensure the builtin datasets are registered
7 |
8 |
9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 |
--------------------------------------------------------------------------------
/detectron2/data/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 |
4 | import numpy as np
5 | import os
6 | import xml.etree.ElementTree as ET
7 | from typing import List, Tuple, Union
8 |
9 | from detectron2.data import DatasetCatalog, MetadataCatalog
10 | from detectron2.structures import BoxMode
11 | from detectron2.utils.file_io import PathManager
12 |
13 | __all__ = ["load_voc_instances", "register_pascal_voc"]
14 |
15 |
16 | # fmt: off
17 | CLASS_NAMES = (
18 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
19 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
20 | "pottedplant", "sheep", "sofa", "train", "tvmonitor"
21 | )
22 | # fmt: on
23 |
24 |
25 | def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
26 | """
27 | Load Pascal VOC detection annotations to Detectron2 format.
28 |
29 | Args:
30 | dirname: Contain "Annotations", "ImageSets", "JPEGImages"
31 | split (str): one of "train", "test", "val", "trainval"
32 | class_names: list or tuple of class names
33 | """
34 | with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
35 | fileids = np.loadtxt(f, dtype=np.str)
36 |
37 | # Needs to read many small annotation files. Makes sense at local
38 | annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
39 | dicts = []
40 | for fileid in fileids:
41 | anno_file = os.path.join(annotation_dirname, fileid + ".xml")
42 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
43 |
44 | with PathManager.open(anno_file) as f:
45 | tree = ET.parse(f)
46 |
47 | r = {
48 | "file_name": jpeg_file,
49 | "image_id": fileid,
50 | "height": int(tree.findall("./size/height")[0].text),
51 | "width": int(tree.findall("./size/width")[0].text),
52 | }
53 | instances = []
54 |
55 | for obj in tree.findall("object"):
56 | cls = obj.find("name").text
57 | # We include "difficult" samples in training.
58 | # Based on limited experiments, they don't hurt accuracy.
59 | # difficult = int(obj.find("difficult").text)
60 | # if difficult == 1:
61 | # continue
62 | bbox = obj.find("bndbox")
63 | bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
64 | # Original annotations are integers in the range [1, W or H]
65 | # Assuming they mean 1-based pixel indices (inclusive),
66 | # a box with annotation (xmin=1, xmax=W) covers the whole image.
67 | # In coordinate space this is represented by (xmin=0, xmax=W)
68 | bbox[0] -= 1.0
69 | bbox[1] -= 1.0
70 | instances.append(
71 | {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
72 | )
73 | r["annotations"] = instances
74 | dicts.append(r)
75 | return dicts
76 |
77 |
78 | def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
79 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
80 | MetadataCatalog.get(name).set(
81 | thing_classes=list(class_names), dirname=dirname, year=year, split=split
82 | )
83 |
--------------------------------------------------------------------------------
/detectron2/data/datasets/register_coco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .coco import register_coco_instances # noqa
3 | from .coco_panoptic import register_coco_panoptic_separated # noqa
4 |
--------------------------------------------------------------------------------
/detectron2/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, RandomSubsetTrainingSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 |
5 | __all__ = [
6 | "GroupedBatchSampler",
7 | "TrainingSampler",
8 | "InferenceSampler",
9 | "RepeatFactorTrainingSampler",
10 | "RandomSubsetTrainingSampler"
11 | ]
12 |
--------------------------------------------------------------------------------
/detectron2/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import numpy as np
3 | from torch.utils.data.sampler import BatchSampler, Sampler
4 |
5 |
6 | class GroupedBatchSampler(BatchSampler):
7 | """
8 | Wraps another sampler to yield a mini-batch of indices.
9 | It enforces that the batch only contain elements from the same group.
10 | It also tries to provide mini-batches which follows an ordering which is
11 | as close as possible to the ordering from the original sampler.
12 | """
13 |
14 | def __init__(self, sampler, group_ids, batch_size):
15 | """
16 | Args:
17 | sampler (Sampler): Base sampler.
18 | group_ids (list[int]): If the sampler produces indices in range [0, N),
19 | `group_ids` must be a list of `N` ints which contains the group id of each sample.
20 | The group ids must be a set of integers in the range [0, num_groups).
21 | batch_size (int): Size of mini-batch.
22 | """
23 | if not isinstance(sampler, Sampler):
24 | raise ValueError(
25 | "sampler should be an instance of "
26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler)
27 | )
28 | self.sampler = sampler
29 | self.group_ids = np.asarray(group_ids)
30 | assert self.group_ids.ndim == 1
31 | self.batch_size = batch_size
32 | groups = np.unique(self.group_ids).tolist()
33 |
34 | # buffer the indices of each group until batch size is reached
35 | self.buffer_per_group = {k: [] for k in groups}
36 |
37 | def __iter__(self):
38 | for idx in self.sampler:
39 | group_id = self.group_ids[idx]
40 | group_buffer = self.buffer_per_group[group_id]
41 | group_buffer.append(idx)
42 | if len(group_buffer) == self.batch_size:
43 | yield group_buffer[:] # yield a copy of the list
44 | del group_buffer[:]
45 |
46 | def __len__(self):
47 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
48 |
--------------------------------------------------------------------------------
/detectron2/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from fvcore.transforms.transform import Transform, TransformList # order them first
3 | from fvcore.transforms.transform import *
4 | from .transform import *
5 | from .augmentation import *
6 | from .augmentation_impl import *
7 |
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 |
--------------------------------------------------------------------------------
/detectron2/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from .launch import *
4 | from .train_loop import *
5 |
6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
7 |
8 |
9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
10 | # but still make them available here
11 | from .hooks import *
12 | from .defaults import *
13 |
--------------------------------------------------------------------------------
/detectron2/engine/launch.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import logging
3 | import torch
4 | import torch.distributed as dist
5 | import torch.multiprocessing as mp
6 |
7 | from detectron2.utils import comm
8 |
9 | __all__ = ["launch"]
10 |
11 |
12 | def _find_free_port():
13 | import socket
14 |
15 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
16 | # Binding to port 0 will cause the OS to find an available port for us
17 | sock.bind(("", 0))
18 | port = sock.getsockname()[1]
19 | sock.close()
20 | # NOTE: there is still a chance the port could be taken by other processes.
21 | return port
22 |
23 |
24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()):
25 | """
26 | Launch multi-gpu or distributed training.
27 | This function must be called on all machines involved in the training.
28 | It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
29 |
30 | Args:
31 | main_func: a function that will be called by `main_func(*args)`
32 | num_gpus_per_machine (int): number of GPUs per machine
33 | num_machines (int): the total number of machines
34 | machine_rank (int): the rank of this machine
35 | dist_url (str): url to connect to for distributed jobs, including protocol
36 | e.g. "tcp://127.0.0.1:8686".
37 | Can be set to "auto" to automatically select a free port on localhost
38 | args (tuple): arguments passed to main_func
39 | """
40 | world_size = num_machines * num_gpus_per_machine
41 | if world_size > 1:
42 | # https://github.com/pytorch/pytorch/pull/14391
43 | # TODO prctl in spawned processes
44 |
45 | if dist_url == "auto":
46 | assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
47 | port = _find_free_port()
48 | dist_url = f"tcp://127.0.0.1:{port}"
49 | if num_machines > 1 and dist_url.startswith("file://"):
50 | logger = logging.getLogger(__name__)
51 | logger.warning(
52 | "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
53 | )
54 |
55 | mp.spawn(
56 | _distributed_worker,
57 | nprocs=num_gpus_per_machine,
58 | args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args),
59 | daemon=False,
60 | )
61 | else:
62 | main_func(*args)
63 |
64 |
65 | def _distributed_worker(
66 | local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
67 | ):
68 | assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
69 | global_rank = machine_rank * num_gpus_per_machine + local_rank
70 | try:
71 | dist.init_process_group(
72 | backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
73 | )
74 | except Exception as e:
75 | logger = logging.getLogger(__name__)
76 | logger.error("Process group URL: {}".format(dist_url))
77 | raise e
78 | # synchronize is needed here to prevent a possible timeout after calling init_process_group
79 | # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
80 | comm.synchronize()
81 |
82 | assert num_gpus_per_machine <= torch.cuda.device_count()
83 | torch.cuda.set_device(local_rank)
84 |
85 | # Setup the local process group (which contains ranks within the same machine)
86 | assert comm._LOCAL_PROCESS_GROUP is None
87 | num_machines = world_size // num_gpus_per_machine
88 | for i in range(num_machines):
89 | ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
90 | pg = dist.new_group(ranks_on_i)
91 | if i == machine_rank:
92 | comm._LOCAL_PROCESS_GROUP = pg
93 |
94 | main_func(*args)
95 |
--------------------------------------------------------------------------------
/detectron2/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
3 | from .coco_evaluation import COCOEvaluator
4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator
5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
6 | from .lvis_evaluation import LVISEvaluator
7 | from .panoptic_evaluation import COCOPanopticEvaluator
8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
9 | from .sem_seg_evaluation import SemSegEvaluator
10 | from .testing import print_csv_format, verify_results
11 |
12 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
13 |
--------------------------------------------------------------------------------
/detectron2/evaluation/fast_eval_api.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import copy
3 | import numpy as np
4 | import time
5 | from pycocotools.cocoeval import COCOeval
6 |
7 | from detectron2 import _C
8 |
9 |
10 | class COCOeval_opt(COCOeval):
11 | """
12 | This is a slightly modified version of the original COCO API, where the functions evaluateImg()
13 | and accumulate() are implemented in C++ to speedup evaluation
14 | """
15 |
16 | def evaluate(self):
17 | """
18 | Run per image evaluation on given images and store results in self.evalImgs_cpp, a
19 | datastructure that isn't readable from Python but is used by a c++ implementation of
20 | accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure
21 | self.evalImgs because this datastructure is a computational bottleneck.
22 | :return: None
23 | """
24 | tic = time.time()
25 |
26 | print("Running per image evaluation...")
27 | p = self.params
28 | # add backward compatibility if useSegm is specified in params
29 | if p.useSegm is not None:
30 | p.iouType = "segm" if p.useSegm == 1 else "bbox"
31 | print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType))
32 | print("Evaluate annotation type *{}*".format(p.iouType))
33 | p.imgIds = list(np.unique(p.imgIds))
34 | if p.useCats:
35 | p.catIds = list(np.unique(p.catIds))
36 | p.maxDets = sorted(p.maxDets)
37 | self.params = p
38 |
39 | self._prepare()
40 |
41 | # loop through images, area range, max detection number
42 | catIds = p.catIds if p.useCats else [-1]
43 |
44 | if p.iouType == "segm" or p.iouType == "bbox":
45 | computeIoU = self.computeIoU
46 | elif p.iouType == "keypoints":
47 | computeIoU = self.computeOks
48 | self.ious = {
49 | (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
50 | }
51 |
52 | maxDet = p.maxDets[-1]
53 |
54 | # <<<< Beginning of code differences with original COCO API
55 | def convert_instances_to_cpp(instances, is_det=False):
56 | # Convert annotations for a list of instances in an image to a format that's fast
57 | # to access in C++
58 | instances_cpp = []
59 | for instance in instances:
60 | instance_cpp = _C.InstanceAnnotation(
61 | int(instance["id"]),
62 | instance["score"] if is_det else instance.get("score", 0.0),
63 | instance["area"],
64 | bool(instance.get("iscrowd", 0)),
65 | bool(instance.get("ignore", 0)),
66 | )
67 | instances_cpp.append(instance_cpp)
68 | return instances_cpp
69 |
70 | # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
71 | ground_truth_instances = [
72 | [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
73 | for imgId in p.imgIds
74 | ]
75 | detected_instances = [
76 | [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
77 | for imgId in p.imgIds
78 | ]
79 | ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
80 |
81 | if not p.useCats:
82 | # For each image, flatten per-category lists into a single list
83 | ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
84 | detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
85 |
86 | # Call C++ implementation of self.evaluateImgs()
87 | self._evalImgs_cpp = _C.COCOevalEvaluateImages(
88 | p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
89 | )
90 | self._evalImgs = None
91 |
92 | self._paramsEval = copy.deepcopy(self.params)
93 | toc = time.time()
94 | print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
95 | # >>>> End of code differences with original COCO API
96 |
97 | def accumulate(self):
98 | """
99 | Accumulate per image evaluation results and store the result in self.eval. Does not
100 | support changing parameter settings from those used by self.evaluate()
101 | """
102 | print("Accumulating evaluation results...")
103 | tic = time.time()
104 | if not hasattr(self, "_evalImgs_cpp"):
105 | print("Please run evaluate() first")
106 |
107 | self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
108 |
109 | # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
110 | self.eval["recall"] = np.array(self.eval["recall"]).reshape(
111 | self.eval["counts"][:1] + self.eval["counts"][2:]
112 | )
113 |
114 | # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
115 | # num_area_ranges X num_max_detections
116 | self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
117 | self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
118 | toc = time.time()
119 | print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
120 |
--------------------------------------------------------------------------------
/detectron2/evaluation/testing.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import logging
3 | import numpy as np
4 | import pprint
5 | import sys
6 | from collections import OrderedDict
7 | from collections.abc import Mapping
8 |
9 |
10 | def print_csv_format(results):
11 | """
12 | Print main metrics in a format similar to Detectron,
13 | so that they are easy to copypaste into a spreadsheet.
14 |
15 | Args:
16 | results (OrderedDict[dict]): task_name -> {metric -> score}
17 | """
18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed
19 | logger = logging.getLogger(__name__)
20 | for task, res in results.items():
21 | # Don't print "AP-category" metrics since they are usually not tracked.
22 | important_res = [(k, v) for k, v in res.items() if "-" not in k]
23 | logger.info("copypaste: Task: {}".format(task))
24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
26 |
27 |
28 | def verify_results(cfg, results):
29 | """
30 | Args:
31 | results (OrderedDict[dict]): task_name -> {metric -> score}
32 |
33 | Returns:
34 | bool: whether the verification succeeds or not
35 | """
36 | expected_results = cfg.TEST.EXPECTED_RESULTS
37 | if not len(expected_results):
38 | return True
39 |
40 | ok = True
41 | for task, metric, expected, tolerance in expected_results:
42 | actual = results[task].get(metric, None)
43 | if actual is None:
44 | ok = False
45 | continue
46 | if not np.isfinite(actual):
47 | ok = False
48 | continue
49 | diff = abs(actual - expected)
50 | if diff > tolerance:
51 | ok = False
52 |
53 | logger = logging.getLogger(__name__)
54 | if not ok:
55 | logger.error("Result verification failed!")
56 | logger.error("Expected Results: " + str(expected_results))
57 | logger.error("Actual Results: " + pprint.pformat(results))
58 |
59 | sys.exit(1)
60 | else:
61 | logger.info("Results verification passed.")
62 | return ok
63 |
64 |
65 | def flatten_results_dict(results):
66 | """
67 | Expand a hierarchical dict of scalars into a flat dict of scalars.
68 | If results[k1][k2][k3] = v, the returned dict will have the entry
69 | {"k1/k2/k3": v}.
70 |
71 | Args:
72 | results (dict):
73 | """
74 | r = {}
75 | for k, v in results.items():
76 | if isinstance(v, Mapping):
77 | v = flatten_results_dict(v)
78 | for kk, vv in v.items():
79 | r[k + "/" + kk] = vv
80 | else:
81 | r[k] = v
82 | return r
83 |
--------------------------------------------------------------------------------
/detectron2/export/README.md:
--------------------------------------------------------------------------------
1 |
2 | This directory contains code to prepare a detectron2 model for deployment.
3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
4 |
5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
6 |
7 |
8 | ### Acknowledgements
9 |
10 | Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
11 |
12 | Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
13 | help export Detectron2 models to TorchScript.
14 |
--------------------------------------------------------------------------------
/detectron2/export/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .api import *
4 |
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 |
--------------------------------------------------------------------------------
/detectron2/export/caffe2_inference.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import logging
4 | import numpy as np
5 | from itertools import count
6 | import torch
7 | from caffe2.proto import caffe2_pb2
8 | from caffe2.python import core
9 |
10 | from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
11 | from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | # ===== ref: mobile-vision's 'Caffe2Wrapper' class ======
17 | class ProtobufModel(torch.nn.Module):
18 | """
19 | Wrapper of a caffe2's protobuf model.
20 | It works just like nn.Module, but running caffe2 under the hood.
21 | Input/Output are Dict[str, tensor] whose keys are in external_input/output.
22 | """
23 |
24 | _ids = count(0)
25 |
26 | def __init__(self, predict_net, init_net):
27 | logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...")
28 | super().__init__()
29 | assert isinstance(predict_net, caffe2_pb2.NetDef)
30 | assert isinstance(init_net, caffe2_pb2.NetDef)
31 | # create unique temporary workspace for each instance
32 | self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids))
33 | self.net = core.Net(predict_net)
34 |
35 | logger.info("Running init_net once to fill the parameters ...")
36 | with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
37 | ws.RunNetOnce(init_net)
38 | uninitialized_external_input = []
39 | for blob in self.net.Proto().external_input:
40 | if blob not in ws.Blobs():
41 | uninitialized_external_input.append(blob)
42 | ws.CreateBlob(blob)
43 | ws.CreateNet(self.net)
44 |
45 | self._error_msgs = set()
46 | self._input_blobs = uninitialized_external_input
47 |
48 | def _infer_output_devices(self, inputs):
49 | """
50 | Returns:
51 | list[str]: list of device for each external output
52 | """
53 |
54 | def _get_device_type(torch_tensor):
55 | assert torch_tensor.device.type in ["cpu", "cuda"]
56 | assert torch_tensor.device.index == 0
57 | return torch_tensor.device.type
58 |
59 | predict_net = self.net.Proto()
60 | input_device_types = {
61 | (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs)
62 | }
63 | device_type_map = infer_device_type(
64 | predict_net, known_status=input_device_types, device_name_style="pytorch"
65 | )
66 | ssa, versions = core.get_ssa(predict_net)
67 | versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
68 | output_devices = [device_type_map[outp] for outp in versioned_outputs]
69 | return output_devices
70 |
71 | def forward(self, inputs):
72 | """
73 | Args:
74 | inputs (tuple[torch.Tensor])
75 |
76 | Returns:
77 | dict[str, torch.Tensor]
78 | """
79 | assert len(inputs) == len(self._input_blobs), (
80 | f"Length of inputs ({len(inputs)}) "
81 | f"doesn't match the required input blobs: {self._input_blobs}"
82 | )
83 |
84 | with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
85 | for b, tensor in zip(self._input_blobs, inputs):
86 | ws.FeedBlob(b, tensor)
87 |
88 | try:
89 | ws.RunNet(self.net.Proto().name)
90 | except RuntimeError as e:
91 | if not str(e) in self._error_msgs:
92 | self._error_msgs.add(str(e))
93 | logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
94 | logger.warning("Catch the error and use partial results.")
95 |
96 | c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output]
97 | # Remove outputs of current run, this is necessary in order to
98 | # prevent fetching the result from previous run if the model fails
99 | # in the middle.
100 | for b in self.net.Proto().external_output:
101 | # Needs to create uninitialized blob to make the net runable.
102 | # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
103 | # but there'no such API.
104 | ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).")
105 |
106 | # Cast output to torch.Tensor on the desired device
107 | output_devices = (
108 | self._infer_output_devices(inputs)
109 | if any(t.device.type != "cpu" for t in inputs)
110 | else ["cpu" for _ in self.net.Proto().external_output]
111 | )
112 |
113 | outputs = []
114 | for name, c2_output, device in zip(
115 | self.net.Proto().external_output, c2_outputs, output_devices
116 | ):
117 | if not isinstance(c2_output, np.ndarray):
118 | raise RuntimeError(
119 | "Invalid output for blob {}, received: {}".format(name, c2_output)
120 | )
121 | outputs.append(torch.Tensor(c2_output).to(device=device))
122 | # TODO change to tuple in the future
123 | return dict(zip(self.net.Proto().external_output, outputs))
124 |
125 |
126 | class ProtobufDetectionModel(torch.nn.Module):
127 | """
128 | A class works just like a pytorch meta arch in terms of inference, but running
129 | caffe2 model under the hood.
130 | """
131 |
132 | def __init__(self, predict_net, init_net, *, convert_outputs=None):
133 | """
134 | Args:
135 | predict_net, init_net (core.Net): caffe2 nets
136 | convert_outptus (callable): a function that converts caffe2
137 | outputs to the same format of the original pytorch model.
138 | By default, use the one defined in the caffe2 meta_arch.
139 | """
140 | super().__init__()
141 | self.protobuf_model = ProtobufModel(predict_net, init_net)
142 | self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
143 | self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
144 |
145 | if convert_outputs is None:
146 | meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
147 | meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
148 | self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
149 | else:
150 | self._convert_outputs = convert_outputs
151 |
152 | def _convert_inputs(self, batched_inputs):
153 | # currently all models convert inputs in the same way
154 | return convert_batched_inputs_to_c2_format(
155 | batched_inputs, self.size_divisibility, self.device
156 | )
157 |
158 | def forward(self, batched_inputs):
159 | c2_inputs = self._convert_inputs(batched_inputs)
160 | c2_results = self.protobuf_model(c2_inputs)
161 | return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
162 |
--------------------------------------------------------------------------------
/detectron2/export/caffe2_patch.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import contextlib
4 | from unittest import mock
5 | import torch
6 |
7 | from detectron2.modeling import poolers
8 | from detectron2.modeling.proposal_generator import rpn
9 | from detectron2.modeling.roi_heads import keypoint_head, mask_head
10 | from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
11 |
12 | from .c10 import (
13 | Caffe2Compatible,
14 | Caffe2FastRCNNOutputsInference,
15 | Caffe2KeypointRCNNInference,
16 | Caffe2MaskRCNNInference,
17 | Caffe2ROIPooler,
18 | Caffe2RPN,
19 | )
20 |
21 |
22 | class GenericMixin(object):
23 | pass
24 |
25 |
26 | class Caffe2CompatibleConverter(object):
27 | """
28 | A GenericUpdater which implements the `create_from` interface, by modifying
29 | module object and assign it with another class replaceCls.
30 | """
31 |
32 | def __init__(self, replaceCls):
33 | self.replaceCls = replaceCls
34 |
35 | def create_from(self, module):
36 | # update module's class to the new class
37 | assert isinstance(module, torch.nn.Module)
38 | if issubclass(self.replaceCls, GenericMixin):
39 | # replaceCls should act as mixin, create a new class on-the-fly
40 | new_class = type(
41 | "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
42 | (self.replaceCls, module.__class__),
43 | {}, # {"new_method": lambda self: ...},
44 | )
45 | module.__class__ = new_class
46 | else:
47 | # replaceCls is complete class, this allow arbitrary class swap
48 | module.__class__ = self.replaceCls
49 |
50 | # initialize Caffe2Compatible
51 | if isinstance(module, Caffe2Compatible):
52 | module.tensor_mode = False
53 |
54 | return module
55 |
56 |
57 | def patch(model, target, updater, *args, **kwargs):
58 | """
59 | recursively (post-order) update all modules with the target type and its
60 | subclasses, make a initialization/composition/inheritance/... via the
61 | updater.create_from.
62 | """
63 | for name, module in model.named_children():
64 | model._modules[name] = patch(module, target, updater, *args, **kwargs)
65 | if isinstance(model, target):
66 | return updater.create_from(model, *args, **kwargs)
67 | return model
68 |
69 |
70 | def patch_generalized_rcnn(model):
71 | ccc = Caffe2CompatibleConverter
72 | model = patch(model, rpn.RPN, ccc(Caffe2RPN))
73 | model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
74 |
75 | return model
76 |
77 |
78 | @contextlib.contextmanager
79 | def mock_fastrcnn_outputs_inference(
80 | tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
81 | ):
82 | with mock.patch.object(
83 | box_predictor_type,
84 | "inference",
85 | autospec=True,
86 | side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
87 | ) as mocked_func:
88 | yield
89 | if check:
90 | assert mocked_func.call_count > 0
91 |
92 |
93 | @contextlib.contextmanager
94 | def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
95 | with mock.patch(
96 | "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
97 | ) as mocked_func:
98 | yield
99 | if check:
100 | assert mocked_func.call_count > 0
101 |
102 |
103 | @contextlib.contextmanager
104 | def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
105 | with mock.patch(
106 | "{}.keypoint_rcnn_inference".format(patched_module),
107 | side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
108 | ) as mocked_func:
109 | yield
110 | if check:
111 | assert mocked_func.call_count > 0
112 |
113 |
114 | class ROIHeadsPatcher:
115 | def __init__(self, heads, use_heatmap_max_keypoint):
116 | self.heads = heads
117 | self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
118 |
119 | @contextlib.contextmanager
120 | def mock_roi_heads(self, tensor_mode=True):
121 | """
122 | Patching several inference functions inside ROIHeads and its subclasses
123 |
124 | Args:
125 | tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
126 | format or not. Default to True.
127 | """
128 | # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
129 | # are called inside the same file as BaseXxxHead due to using mock.patch.
130 | kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
131 | mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
132 |
133 | mock_ctx_managers = [
134 | mock_fastrcnn_outputs_inference(
135 | tensor_mode=tensor_mode,
136 | check=True,
137 | box_predictor_type=type(self.heads.box_predictor),
138 | )
139 | ]
140 | if getattr(self.heads, "keypoint_on", False):
141 | mock_ctx_managers += [
142 | mock_keypoint_rcnn_inference(
143 | tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
144 | )
145 | ]
146 | if getattr(self.heads, "mask_on", False):
147 | mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
148 |
149 | with contextlib.ExitStack() as stack: # python 3.3+
150 | for mgr in mock_ctx_managers:
151 | stack.enter_context(mgr)
152 | yield
153 |
--------------------------------------------------------------------------------
/detectron2/export/torchscript.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import os
4 | import torch
5 |
6 | from detectron2.utils.file_io import PathManager
7 |
8 | from .torchscript_patch import patch_instances, patch_nonscriptable_classes
9 |
10 |
11 | def export_torchscript_with_instances(model, fields):
12 | """
13 | Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
14 | attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult
15 | for torchscript to support it out of the box. This function is made to support scripting
16 | a model that uses :class:`Instances`. It does the following:
17 |
18 | 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
19 | but with all attributes been "static".
20 | The attributes need to be statically declared in the ``fields`` argument.
21 | 2. Register ``new_Instances`` to torchscript, and force torchscript to
22 | use it when trying to compile ``Instances``.
23 |
24 | After this function, the process will be reverted. User should be able to script another model
25 | using different fields.
26 |
27 | Example:
28 | Assume that ``Instances`` in the model consist of two attributes named
29 | ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
30 | :class:`Tensor` respectively during inference. You can call this function like:
31 |
32 | ::
33 | fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
34 | torchscipt_model = export_torchscript_with_instances(model, fields)
35 |
36 | Note:
37 | Currently we only support models in evaluation mode.
38 |
39 | Args:
40 | model (nn.Module): The input model to be exported to torchscript.
41 | fields (Dict[str, type]): Attribute names and corresponding type that
42 | ``Instances`` will use in the model. Note that all attributes used in ``Instances``
43 | need to be added, regarldess of whether they are inputs/outputs of the model.
44 | Data type not defined in detectron2 is not supported for now.
45 |
46 | Returns:
47 | torch.jit.ScriptModule: the input model in torchscript format
48 | """
49 | patch_nonscriptable_classes()
50 |
51 | assert (
52 | not model.training
53 | ), "Currently we only support exporting models in evaluation mode to torchscript"
54 |
55 | with patch_instances(fields):
56 | scripted_model = torch.jit.script(model)
57 | return scripted_model
58 |
59 |
60 | def dump_torchscript_IR(model, dir):
61 | """
62 | Dump IR of a TracedModule/ScriptModule at various levels.
63 | Useful for debugging.
64 |
65 | Args:
66 | model (TracedModule or ScriptModule): traced or scripted module
67 | dir (str): output directory to dump files.
68 | """
69 | PathManager.mkdirs(dir)
70 |
71 | def _get_script_mod(mod):
72 | if isinstance(mod, torch.jit.TracedModule):
73 | return mod._actual_script_module
74 | return mod
75 |
76 | # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
77 | with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
78 |
79 | def get_code(mod):
80 | # Try a few ways to get code using private attributes.
81 | try:
82 | # This contains more information than just `mod.code`
83 | return _get_script_mod(mod)._c.code
84 | except AttributeError:
85 | pass
86 | try:
87 | return mod.code
88 | except AttributeError:
89 | return None
90 |
91 | def dump_code(prefix, mod):
92 | code = get_code(mod)
93 | name = prefix or "root model"
94 | if code is None:
95 | f.write(f"Could not found code for {name} (type={mod.original_name})\n")
96 | f.write("\n")
97 | else:
98 | f.write(f"\nCode for {name}, type={mod.original_name}:\n")
99 | f.write(code)
100 | f.write("\n")
101 | f.write("-" * 80)
102 |
103 | for name, m in mod.named_children():
104 | dump_code(prefix + "." + name, m)
105 |
106 | dump_code("", model)
107 |
108 | # Recursively dump IR of all modules
109 | with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
110 | try:
111 | f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
112 | except AttributeError:
113 | pass
114 |
115 | # Dump IR of the entire graph (all submodules inlined)
116 | with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
117 | f.write(str(model.inlined_graph))
118 |
119 | # Dump the model structure in pytorch style
120 | with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
121 | f.write(str(model))
122 |
--------------------------------------------------------------------------------
/detectron2/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
3 | from .deform_conv import DeformConv, ModulatedDeformConv
4 | from .mask_ops import paste_masks_in_image
5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
6 | from .roi_align import ROIAlign, roi_align
7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
8 | from .shape_spec import ShapeSpec
9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple
10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
11 | from .aspp import ASPP
12 |
13 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
14 |
--------------------------------------------------------------------------------
/detectron2/layers/aspp.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from copy import deepcopy
4 | import fvcore.nn.weight_init as weight_init
5 | import torch
6 | from torch import nn
7 | from torch.nn import functional as F
8 |
9 | from .batch_norm import get_norm
10 | from .wrappers import Conv2d
11 |
12 |
13 | class ASPP(nn.Module):
14 | """
15 | Atrous Spatial Pyramid Pooling (ASPP).
16 | """
17 |
18 | def __init__(
19 | self,
20 | in_channels,
21 | out_channels,
22 | dilations,
23 | *,
24 | norm,
25 | activation,
26 | pool_kernel_size=None,
27 | dropout: float = 0.0,
28 | ):
29 | """
30 | Args:
31 | in_channels (int): number of input channels for ASPP.
32 | out_channels (int): number of output channels.
33 | dilations (list): a list of 3 dilations in ASPP.
34 | norm (str or callable): normalization for all conv layers.
35 | See :func:`layers.get_norm` for supported format. norm is
36 | applied to all conv layers except the conv following
37 | global average pooling.
38 | activation (callable): activation function.
39 | pool_kernel_size (tuple, list): the average pooling size (kh, kw)
40 | for image pooling layer in ASPP. If set to None, it always
41 | performs global average pooling. If not None, it must be
42 | divisible by the shape of inputs in forward(). It is recommended
43 | to use a fixed input feature size in training, and set this
44 | option to match this size, so that it performs global average
45 | pooling in training, and the size of the pooling window stays
46 | consistent in inference.
47 | dropout (float): apply dropout on the output of ASPP. It is used in
48 | the official DeepLab implementation with a rate of 0.1:
49 | https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa
50 | """
51 | super(ASPP, self).__init__()
52 | assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
53 | self.pool_kernel_size = pool_kernel_size
54 | self.dropout = dropout
55 | use_bias = norm == ""
56 | self.convs = nn.ModuleList()
57 | # conv 1x1
58 | self.convs.append(
59 | Conv2d(
60 | in_channels,
61 | out_channels,
62 | kernel_size=1,
63 | bias=use_bias,
64 | norm=get_norm(norm, out_channels),
65 | activation=deepcopy(activation),
66 | )
67 | )
68 | weight_init.c2_xavier_fill(self.convs[-1])
69 | # atrous convs
70 | for dilation in dilations:
71 | self.convs.append(
72 | Conv2d(
73 | in_channels,
74 | out_channels,
75 | kernel_size=3,
76 | padding=dilation,
77 | dilation=dilation,
78 | bias=use_bias,
79 | norm=get_norm(norm, out_channels),
80 | activation=deepcopy(activation),
81 | )
82 | )
83 | weight_init.c2_xavier_fill(self.convs[-1])
84 | # image pooling
85 | # We do not add BatchNorm because the spatial resolution is 1x1,
86 | # the original TF implementation has BatchNorm.
87 | if pool_kernel_size is None:
88 | image_pooling = nn.Sequential(
89 | nn.AdaptiveAvgPool2d(1),
90 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
91 | )
92 | else:
93 | image_pooling = nn.Sequential(
94 | nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
95 | Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
96 | )
97 | weight_init.c2_xavier_fill(image_pooling[1])
98 | self.convs.append(image_pooling)
99 |
100 | self.project = Conv2d(
101 | 5 * out_channels,
102 | out_channels,
103 | kernel_size=1,
104 | bias=use_bias,
105 | norm=get_norm(norm, out_channels),
106 | activation=deepcopy(activation),
107 | )
108 | weight_init.c2_xavier_fill(self.project)
109 |
110 | def forward(self, x):
111 | size = x.shape[-2:]
112 | if self.pool_kernel_size is not None:
113 | if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
114 | raise ValueError(
115 | "`pool_kernel_size` must be divisible by the shape of inputs. "
116 | "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
117 | )
118 | res = []
119 | for conv in self.convs:
120 | res.append(conv(x))
121 | res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
122 | res = torch.cat(res, dim=1)
123 | res = self.project(res)
124 | res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
125 | return res
126 |
--------------------------------------------------------------------------------
/detectron2/layers/blocks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 |
4 | import fvcore.nn.weight_init as weight_init
5 | from torch import nn
6 |
7 | from .batch_norm import FrozenBatchNorm2d, get_norm
8 | from .wrappers import Conv2d
9 |
10 |
11 | """
12 | CNN building blocks.
13 | """
14 |
15 |
16 | class CNNBlockBase(nn.Module):
17 | """
18 | A CNN block is assumed to have input channels, output channels and a stride.
19 | The input and output of `forward()` method must be NCHW tensors.
20 | The method can perform arbitrary computation but must match the given
21 | channels and stride specification.
22 |
23 | Attribute:
24 | in_channels (int):
25 | out_channels (int):
26 | stride (int):
27 | """
28 |
29 | def __init__(self, in_channels, out_channels, stride):
30 | """
31 | The `__init__` method of any subclass should also contain these arguments.
32 |
33 | Args:
34 | in_channels (int):
35 | out_channels (int):
36 | stride (int):
37 | """
38 | super().__init__()
39 | self.in_channels = in_channels
40 | self.out_channels = out_channels
41 | self.stride = stride
42 |
43 | def freeze(self):
44 | """
45 | Make this block not trainable.
46 | This method sets all parameters to `requires_grad=False`,
47 | and convert all BatchNorm layers to FrozenBatchNorm
48 |
49 | Returns:
50 | the block itself
51 | """
52 | for p in self.parameters():
53 | p.requires_grad = False
54 | FrozenBatchNorm2d.convert_frozen_batchnorm(self)
55 | return self
56 |
57 |
58 | class DepthwiseSeparableConv2d(nn.Module):
59 | """
60 | A kxk depthwise convolution + a 1x1 convolution.
61 |
62 | In :paper:`xception`, norm & activation are applied on the second conv.
63 | :paper:`mobilenet` uses norm & activation on both convs.
64 | """
65 |
66 | def __init__(
67 | self,
68 | in_channels,
69 | out_channels,
70 | kernel_size=3,
71 | padding=1,
72 | *,
73 | norm1=None,
74 | activation1=None,
75 | norm2=None,
76 | activation2=None,
77 | ):
78 | """
79 | Args:
80 | norm1, norm2 (str or callable): normalization for the two conv layers.
81 | activation1, activation2 (callable(Tensor) -> Tensor): activation
82 | function for the two conv layers.
83 | """
84 | super().__init__()
85 | self.depthwise = Conv2d(
86 | in_channels,
87 | in_channels,
88 | kernel_size=kernel_size,
89 | padding=padding,
90 | groups=in_channels,
91 | bias=not norm1,
92 | norm=get_norm(norm1, in_channels),
93 | activation=activation1,
94 | )
95 | self.pointwise = Conv2d(
96 | in_channels,
97 | out_channels,
98 | kernel_size=1,
99 | bias=not norm2,
100 | norm=get_norm(norm2, out_channels),
101 | activation=activation2,
102 | )
103 |
104 | # default initialization
105 | weight_init.c2_msra_fill(self.depthwise)
106 | weight_init.c2_msra_fill(self.pointwise)
107 |
108 | def forward(self, x):
109 | return self.pointwise(self.depthwise(x))
110 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | To add a new Op:
4 |
5 | 1. Create a new directory
6 | 2. Implement new ops there
7 | 3. Delcare its Python interface in `vision.cpp`.
8 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/ROIAlign/ROIAlign.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #pragma once
3 | #include
4 |
5 | namespace detectron2 {
6 |
7 | at::Tensor ROIAlign_forward_cpu(
8 | const at::Tensor& input,
9 | const at::Tensor& rois,
10 | const float spatial_scale,
11 | const int pooled_height,
12 | const int pooled_width,
13 | const int sampling_ratio,
14 | bool aligned);
15 |
16 | at::Tensor ROIAlign_backward_cpu(
17 | const at::Tensor& grad,
18 | const at::Tensor& rois,
19 | const float spatial_scale,
20 | const int pooled_height,
21 | const int pooled_width,
22 | const int batch_size,
23 | const int channels,
24 | const int height,
25 | const int width,
26 | const int sampling_ratio,
27 | bool aligned);
28 |
29 | #if defined(WITH_CUDA) || defined(WITH_HIP)
30 | at::Tensor ROIAlign_forward_cuda(
31 | const at::Tensor& input,
32 | const at::Tensor& rois,
33 | const float spatial_scale,
34 | const int pooled_height,
35 | const int pooled_width,
36 | const int sampling_ratio,
37 | bool aligned);
38 |
39 | at::Tensor ROIAlign_backward_cuda(
40 | const at::Tensor& grad,
41 | const at::Tensor& rois,
42 | const float spatial_scale,
43 | const int pooled_height,
44 | const int pooled_width,
45 | const int batch_size,
46 | const int channels,
47 | const int height,
48 | const int width,
49 | const int sampling_ratio,
50 | bool aligned);
51 | #endif
52 |
53 | // Interface for Python
54 | inline at::Tensor ROIAlign_forward(
55 | const at::Tensor& input,
56 | const at::Tensor& rois,
57 | const float spatial_scale,
58 | const int pooled_height,
59 | const int pooled_width,
60 | const int sampling_ratio,
61 | bool aligned) {
62 | if (input.is_cuda()) {
63 | #if defined(WITH_CUDA) || defined(WITH_HIP)
64 | return ROIAlign_forward_cuda(
65 | input,
66 | rois,
67 | spatial_scale,
68 | pooled_height,
69 | pooled_width,
70 | sampling_ratio,
71 | aligned);
72 | #else
73 | AT_ERROR("Not compiled with GPU support");
74 | #endif
75 | }
76 | return ROIAlign_forward_cpu(
77 | input,
78 | rois,
79 | spatial_scale,
80 | pooled_height,
81 | pooled_width,
82 | sampling_ratio,
83 | aligned);
84 | }
85 |
86 | inline at::Tensor ROIAlign_backward(
87 | const at::Tensor& grad,
88 | const at::Tensor& rois,
89 | const float spatial_scale,
90 | const int pooled_height,
91 | const int pooled_width,
92 | const int batch_size,
93 | const int channels,
94 | const int height,
95 | const int width,
96 | const int sampling_ratio,
97 | bool aligned) {
98 | if (grad.is_cuda()) {
99 | #if defined(WITH_CUDA) || defined(WITH_HIP)
100 | return ROIAlign_backward_cuda(
101 | grad,
102 | rois,
103 | spatial_scale,
104 | pooled_height,
105 | pooled_width,
106 | batch_size,
107 | channels,
108 | height,
109 | width,
110 | sampling_ratio,
111 | aligned);
112 | #else
113 | AT_ERROR("Not compiled with GPU support");
114 | #endif
115 | }
116 | return ROIAlign_backward_cpu(
117 | grad,
118 | rois,
119 | spatial_scale,
120 | pooled_height,
121 | pooled_width,
122 | batch_size,
123 | channels,
124 | height,
125 | width,
126 | sampling_ratio,
127 | aligned);
128 | }
129 |
130 | } // namespace detectron2
131 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #pragma once
3 | #include
4 |
5 | namespace detectron2 {
6 |
7 | at::Tensor ROIAlignRotated_forward_cpu(
8 | const at::Tensor& input,
9 | const at::Tensor& rois,
10 | const float spatial_scale,
11 | const int pooled_height,
12 | const int pooled_width,
13 | const int sampling_ratio);
14 |
15 | at::Tensor ROIAlignRotated_backward_cpu(
16 | const at::Tensor& grad,
17 | const at::Tensor& rois,
18 | const float spatial_scale,
19 | const int pooled_height,
20 | const int pooled_width,
21 | const int batch_size,
22 | const int channels,
23 | const int height,
24 | const int width,
25 | const int sampling_ratio);
26 |
27 | #if defined(WITH_CUDA) || defined(WITH_HIP)
28 | at::Tensor ROIAlignRotated_forward_cuda(
29 | const at::Tensor& input,
30 | const at::Tensor& rois,
31 | const float spatial_scale,
32 | const int pooled_height,
33 | const int pooled_width,
34 | const int sampling_ratio);
35 |
36 | at::Tensor ROIAlignRotated_backward_cuda(
37 | const at::Tensor& grad,
38 | const at::Tensor& rois,
39 | const float spatial_scale,
40 | const int pooled_height,
41 | const int pooled_width,
42 | const int batch_size,
43 | const int channels,
44 | const int height,
45 | const int width,
46 | const int sampling_ratio);
47 | #endif
48 |
49 | // Interface for Python
50 | inline at::Tensor ROIAlignRotated_forward(
51 | const at::Tensor& input,
52 | const at::Tensor& rois,
53 | const float spatial_scale,
54 | const int pooled_height,
55 | const int pooled_width,
56 | const int sampling_ratio) {
57 | if (input.is_cuda()) {
58 | #if defined(WITH_CUDA) || defined(WITH_HIP)
59 | return ROIAlignRotated_forward_cuda(
60 | input,
61 | rois,
62 | spatial_scale,
63 | pooled_height,
64 | pooled_width,
65 | sampling_ratio);
66 | #else
67 | AT_ERROR("Not compiled with GPU support");
68 | #endif
69 | }
70 | return ROIAlignRotated_forward_cpu(
71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
72 | }
73 |
74 | inline at::Tensor ROIAlignRotated_backward(
75 | const at::Tensor& grad,
76 | const at::Tensor& rois,
77 | const float spatial_scale,
78 | const int pooled_height,
79 | const int pooled_width,
80 | const int batch_size,
81 | const int channels,
82 | const int height,
83 | const int width,
84 | const int sampling_ratio) {
85 | if (grad.is_cuda()) {
86 | #if defined(WITH_CUDA) || defined(WITH_HIP)
87 | return ROIAlignRotated_backward_cuda(
88 | grad,
89 | rois,
90 | spatial_scale,
91 | pooled_height,
92 | pooled_width,
93 | batch_size,
94 | channels,
95 | height,
96 | width,
97 | sampling_ratio);
98 | #else
99 | AT_ERROR("Not compiled with GPU support");
100 | #endif
101 | }
102 | return ROIAlignRotated_backward_cpu(
103 | grad,
104 | rois,
105 | spatial_scale,
106 | pooled_height,
107 | pooled_width,
108 | batch_size,
109 | channels,
110 | height,
111 | width,
112 | sampling_ratio);
113 | }
114 |
115 | } // namespace detectron2
116 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #pragma once
3 | #include
4 |
5 | namespace detectron2 {
6 |
7 | at::Tensor box_iou_rotated_cpu(
8 | const at::Tensor& boxes1,
9 | const at::Tensor& boxes2);
10 |
11 | #if defined(WITH_CUDA) || defined(WITH_HIP)
12 | at::Tensor box_iou_rotated_cuda(
13 | const at::Tensor& boxes1,
14 | const at::Tensor& boxes2);
15 | #endif
16 |
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 | const at::Tensor& boxes1,
22 | const at::Tensor& boxes2) {
23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 | if (boxes1.device().is_cuda()) {
25 | #if defined(WITH_CUDA) || defined(WITH_HIP)
26 | return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous());
27 | #else
28 | AT_ERROR("Not compiled with GPU support");
29 | #endif
30 | }
31 |
32 | return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous());
33 | }
34 |
35 | } // namespace detectron2
36 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #include "box_iou_rotated.h"
3 | #include "box_iou_rotated_utils.h"
4 |
5 | namespace detectron2 {
6 |
7 | template
8 | void box_iou_rotated_cpu_kernel(
9 | const at::Tensor& boxes1,
10 | const at::Tensor& boxes2,
11 | at::Tensor& ious) {
12 | auto num_boxes1 = boxes1.size(0);
13 | auto num_boxes2 = boxes2.size(0);
14 |
15 | for (int i = 0; i < num_boxes1; i++) {
16 | for (int j = 0; j < num_boxes2; j++) {
17 | ious[i * num_boxes2 + j] = single_box_iou_rotated(
18 | boxes1[i].data_ptr(), boxes2[j].data_ptr());
19 | }
20 | }
21 | }
22 |
23 | at::Tensor box_iou_rotated_cpu(
24 | // input must be contiguous:
25 | const at::Tensor& boxes1,
26 | const at::Tensor& boxes2) {
27 | auto num_boxes1 = boxes1.size(0);
28 | auto num_boxes2 = boxes2.size(0);
29 | at::Tensor ious =
30 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
31 |
32 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious);
33 |
34 | // reshape from 1d array to 2d array
35 | auto shape = std::vector{num_boxes1, num_boxes2};
36 | return ious.reshape(shape);
37 | }
38 |
39 | } // namespace detectron2
40 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include "box_iou_rotated_utils.h"
7 |
8 | namespace detectron2 {
9 |
10 | // 2D block with 32 * 16 = 512 threads per block
11 | const int BLOCK_DIM_X = 32;
12 | const int BLOCK_DIM_Y = 16;
13 |
14 | template
15 | __global__ void box_iou_rotated_cuda_kernel(
16 | const int n_boxes1,
17 | const int n_boxes2,
18 | const T* dev_boxes1,
19 | const T* dev_boxes2,
20 | T* dev_ious) {
21 | const int row_start = blockIdx.x * blockDim.x;
22 | const int col_start = blockIdx.y * blockDim.y;
23 |
24 | const int row_size = min(n_boxes1 - row_start, blockDim.x);
25 | const int col_size = min(n_boxes2 - col_start, blockDim.y);
26 |
27 | __shared__ float block_boxes1[BLOCK_DIM_X * 5];
28 | __shared__ float block_boxes2[BLOCK_DIM_Y * 5];
29 |
30 | // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
31 | if (threadIdx.x < row_size && threadIdx.y == 0) {
32 | block_boxes1[threadIdx.x * 5 + 0] =
33 | dev_boxes1[(row_start + threadIdx.x) * 5 + 0];
34 | block_boxes1[threadIdx.x * 5 + 1] =
35 | dev_boxes1[(row_start + threadIdx.x) * 5 + 1];
36 | block_boxes1[threadIdx.x * 5 + 2] =
37 | dev_boxes1[(row_start + threadIdx.x) * 5 + 2];
38 | block_boxes1[threadIdx.x * 5 + 3] =
39 | dev_boxes1[(row_start + threadIdx.x) * 5 + 3];
40 | block_boxes1[threadIdx.x * 5 + 4] =
41 | dev_boxes1[(row_start + threadIdx.x) * 5 + 4];
42 | }
43 |
44 | if (threadIdx.x < col_size && threadIdx.y == 0) {
45 | block_boxes2[threadIdx.x * 5 + 0] =
46 | dev_boxes2[(col_start + threadIdx.x) * 5 + 0];
47 | block_boxes2[threadIdx.x * 5 + 1] =
48 | dev_boxes2[(col_start + threadIdx.x) * 5 + 1];
49 | block_boxes2[threadIdx.x * 5 + 2] =
50 | dev_boxes2[(col_start + threadIdx.x) * 5 + 2];
51 | block_boxes2[threadIdx.x * 5 + 3] =
52 | dev_boxes2[(col_start + threadIdx.x) * 5 + 3];
53 | block_boxes2[threadIdx.x * 5 + 4] =
54 | dev_boxes2[(col_start + threadIdx.x) * 5 + 4];
55 | }
56 | __syncthreads();
57 |
58 | if (threadIdx.x < row_size && threadIdx.y < col_size) {
59 | int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y;
60 | dev_ious[offset] = single_box_iou_rotated(
61 | block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
62 | }
63 | }
64 |
65 | at::Tensor box_iou_rotated_cuda(
66 | // input must be contiguous
67 | const at::Tensor& boxes1,
68 | const at::Tensor& boxes2) {
69 | using scalar_t = float;
70 | AT_ASSERTM(
71 | boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor");
72 | AT_ASSERTM(
73 | boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor");
74 | AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor");
75 | AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor");
76 | at::cuda::CUDAGuard device_guard(boxes1.device());
77 |
78 | auto num_boxes1 = boxes1.size(0);
79 | auto num_boxes2 = boxes2.size(0);
80 |
81 | at::Tensor ious =
82 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
83 |
84 | bool transpose = false;
85 | if (num_boxes1 > 0 && num_boxes2 > 0) {
86 | scalar_t *data1 = boxes1.data_ptr(),
87 | *data2 = boxes2.data_ptr();
88 |
89 | if (num_boxes2 > 65535 * BLOCK_DIM_Y) {
90 | AT_ASSERTM(
91 | num_boxes1 <= 65535 * BLOCK_DIM_Y,
92 | "Too many boxes for box_iou_rotated_cuda!");
93 | // x dim is allowed to be large, but y dim cannot,
94 | // so we transpose the two to avoid "invalid configuration argument"
95 | // error. We assume one of them is small. Otherwise the result is hard to
96 | // fit in memory anyway.
97 | std::swap(num_boxes1, num_boxes2);
98 | std::swap(data1, data2);
99 | transpose = true;
100 | }
101 |
102 | const int blocks_x =
103 | at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X);
104 | const int blocks_y =
105 | at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y);
106 |
107 | dim3 blocks(blocks_x, blocks_y);
108 | dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
109 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
110 |
111 | box_iou_rotated_cuda_kernel<<>>(
112 | num_boxes1,
113 | num_boxes2,
114 | data1,
115 | data2,
116 | (scalar_t*)ious.data_ptr());
117 |
118 | AT_CUDA_CHECK(cudaGetLastError());
119 | }
120 |
121 | // reshape from 1d array to 2d array
122 | auto shape = std::vector{num_boxes1, num_boxes2};
123 | if (transpose) {
124 | return ious.view(shape).t();
125 | } else {
126 | return ious.view(shape);
127 | }
128 | }
129 |
130 | } // namespace detectron2
131 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/cocoeval/cocoeval.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #pragma once
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | namespace py = pybind11;
11 |
12 | namespace detectron2 {
13 |
14 | namespace COCOeval {
15 |
16 | // Annotation data for a single object instance in an image
17 | struct InstanceAnnotation {
18 | InstanceAnnotation(
19 | uint64_t id,
20 | double score,
21 | double area,
22 | bool is_crowd,
23 | bool ignore)
24 | : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
25 | uint64_t id;
26 | double score = 0.;
27 | double area = 0.;
28 | bool is_crowd = false;
29 | bool ignore = false;
30 | };
31 |
32 | // Stores intermediate results for evaluating detection results for a single
33 | // image that has D detected instances and G ground truth instances. This stores
34 | // matches between detected and ground truth instances
35 | struct ImageEvaluation {
36 | // For each of the D detected instances, the id of the matched ground truth
37 | // instance, or 0 if unmatched
38 | std::vector detection_matches;
39 |
40 | // The detection score of each of the D detected instances
41 | std::vector detection_scores;
42 |
43 | // Marks whether or not each of G instances was ignored from evaluation (e.g.,
44 | // because it's outside area_range)
45 | std::vector ground_truth_ignores;
46 |
47 | // Marks whether or not each of D instances was ignored from evaluation (e.g.,
48 | // because it's outside aRng)
49 | std::vector detection_ignores;
50 | };
51 |
52 | template
53 | using ImageCategoryInstances = std::vector>>;
54 |
55 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each
56 | // combination of image, category, area range settings, and IOU thresholds to
57 | // evaluate, it matches detected instances to ground truth instances and stores
58 | // the results into a vector of ImageEvaluation results, which will be
59 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall
60 | // curves. The parameters of nested vectors have the following semantics:
61 | // image_category_ious[i][c][d][g] is the intersection over union of the d'th
62 | // detected instance and g'th ground truth instance of
63 | // category category_ids[c] in image image_ids[i]
64 | // image_category_ground_truth_instances[i][c] is a vector of ground truth
65 | // instances in image image_ids[i] of category category_ids[c]
66 | // image_category_detection_instances[i][c] is a vector of detected
67 | // instances in image image_ids[i] of category category_ids[c]
68 | std::vector EvaluateImages(
69 | const std::vector>& area_ranges, // vector of 2-tuples
70 | int max_detections,
71 | const std::vector& iou_thresholds,
72 | const ImageCategoryInstances>& image_category_ious,
73 | const ImageCategoryInstances&
74 | image_category_ground_truth_instances,
75 | const ImageCategoryInstances&
76 | image_category_detection_instances);
77 |
78 | // C++ implementation of COCOeval.accumulate(), which generates precision
79 | // recall curves for each set of category, IOU threshold, detection area range,
80 | // and max number of detections parameters. It is assumed that the parameter
81 | // evaluations is the return value of the functon COCOeval::EvaluateImages(),
82 | // which was called with the same parameter settings params
83 | py::dict Accumulate(
84 | const py::object& params,
85 | const std::vector& evalutations);
86 |
87 | } // namespace COCOeval
88 | } // namespace detectron2
89 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | #include
4 |
5 | namespace detectron2 {
6 | int get_cudart_version() {
7 | // Not a ROCM platform: Either HIP is not used, or
8 | // it is used, but platform is not ROCM (i.e. it is CUDA)
9 | #if !defined(__HIP_PLATFORM_HCC__)
10 | return CUDART_VERSION;
11 | #else
12 | int version = 0;
13 |
14 | #if HIP_VERSION_MAJOR != 0
15 | // Create a convention similar to that of CUDA, as assumed by other
16 | // parts of the code.
17 |
18 | version = HIP_VERSION_MINOR;
19 | version += (HIP_VERSION_MAJOR * 100);
20 | #else
21 | hipRuntimeGetVersion(&version);
22 | #endif
23 | return version;
24 | #endif
25 | }
26 | } // namespace detectron2
27 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #pragma once
3 | #include
4 |
5 | namespace detectron2 {
6 |
7 | at::Tensor nms_rotated_cpu(
8 | const at::Tensor& dets,
9 | const at::Tensor& scores,
10 | const float iou_threshold);
11 |
12 | #if defined(WITH_CUDA) || defined(WITH_HIP)
13 | at::Tensor nms_rotated_cuda(
14 | const at::Tensor& dets,
15 | const at::Tensor& scores,
16 | const float iou_threshold);
17 | #endif
18 |
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 | const at::Tensor& dets,
24 | const at::Tensor& scores,
25 | const float iou_threshold) {
26 | assert(dets.device().is_cuda() == scores.device().is_cuda());
27 | if (dets.device().is_cuda()) {
28 | #if defined(WITH_CUDA) || defined(WITH_HIP)
29 | return nms_rotated_cuda(
30 | dets.contiguous(), scores.contiguous(), iou_threshold);
31 | #else
32 | AT_ERROR("Not compiled with GPU support");
33 | #endif
34 | }
35 |
36 | return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
37 | }
38 |
39 | } // namespace detectron2
40 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
3 | #include "nms_rotated.h"
4 |
5 | namespace detectron2 {
6 |
7 | template
8 | at::Tensor nms_rotated_cpu_kernel(
9 | const at::Tensor& dets,
10 | const at::Tensor& scores,
11 | const float iou_threshold) {
12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
13 | // however, the code in this function is much shorter because
14 | // we delegate the IoU computation for rotated boxes to
15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h
16 | AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor");
17 | AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor");
18 | AT_ASSERTM(
19 | dets.scalar_type() == scores.scalar_type(),
20 | "dets should have the same type as scores");
21 |
22 | if (dets.numel() == 0) {
23 | return at::empty({0}, dets.options().dtype(at::kLong));
24 | }
25 |
26 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
27 |
28 | auto ndets = dets.size(0);
29 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
30 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
31 |
32 | auto suppressed = suppressed_t.data_ptr();
33 | auto keep = keep_t.data_ptr();
34 | auto order = order_t.data_ptr();
35 |
36 | int64_t num_to_keep = 0;
37 |
38 | for (int64_t _i = 0; _i < ndets; _i++) {
39 | auto i = order[_i];
40 | if (suppressed[i] == 1) {
41 | continue;
42 | }
43 |
44 | keep[num_to_keep++] = i;
45 |
46 | for (int64_t _j = _i + 1; _j < ndets; _j++) {
47 | auto j = order[_j];
48 | if (suppressed[j] == 1) {
49 | continue;
50 | }
51 |
52 | auto ovr = single_box_iou_rotated(
53 | dets[i].data_ptr(), dets[j].data_ptr());
54 | if (ovr >= iou_threshold) {
55 | suppressed[j] = 1;
56 | }
57 | }
58 | }
59 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
60 | }
61 |
62 | at::Tensor nms_rotated_cpu(
63 | // input must be contiguous
64 | const at::Tensor& dets,
65 | const at::Tensor& scores,
66 | const float iou_threshold) {
67 | auto result = at::empty({0}, dets.options());
68 |
69 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
70 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold);
71 | });
72 | return result;
73 | }
74 |
75 | } // namespace detectron2
76 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | #include
3 | #include
4 | #include
5 | #include
6 | #ifdef WITH_CUDA
7 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
8 | #endif
9 | // TODO avoid this when pytorch supports "same directory" hipification
10 | #ifdef WITH_HIP
11 | #include "box_iou_rotated/box_iou_rotated_utils.h"
12 | #endif
13 |
14 | using namespace detectron2;
15 |
16 | namespace {
17 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
18 | }
19 |
20 | template
21 | __global__ void nms_rotated_cuda_kernel(
22 | const int n_boxes,
23 | const float iou_threshold,
24 | const T* dev_boxes,
25 | unsigned long long* dev_mask) {
26 | // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
27 |
28 | const int row_start = blockIdx.y;
29 | const int col_start = blockIdx.x;
30 |
31 | // if (row_start > col_start) return;
32 |
33 | const int row_size =
34 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 | const int col_size =
36 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 |
38 | // Compared to nms_cuda_kernel, where each box is represented with 4 values
39 | // (x1, y1, x2, y2), each rotated box is represented with 5 values
40 | // (x_center, y_center, width, height, angle_degrees) here.
41 | __shared__ T block_boxes[threadsPerBlock * 5];
42 | if (threadIdx.x < col_size) {
43 | block_boxes[threadIdx.x * 5 + 0] =
44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
45 | block_boxes[threadIdx.x * 5 + 1] =
46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
47 | block_boxes[threadIdx.x * 5 + 2] =
48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
49 | block_boxes[threadIdx.x * 5 + 3] =
50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
51 | block_boxes[threadIdx.x * 5 + 4] =
52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
53 | }
54 | __syncthreads();
55 |
56 | if (threadIdx.x < row_size) {
57 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
58 | const T* cur_box = dev_boxes + cur_box_idx * 5;
59 | int i = 0;
60 | unsigned long long t = 0;
61 | int start = 0;
62 | if (row_start == col_start) {
63 | start = threadIdx.x + 1;
64 | }
65 | for (i = start; i < col_size; i++) {
66 | // Instead of devIoU used by original horizontal nms, here
67 | // we use the single_box_iou_rotated function from box_iou_rotated_utils.h
68 | if (single_box_iou_rotated(cur_box, block_boxes + i * 5) >
69 | iou_threshold) {
70 | t |= 1ULL << i;
71 | }
72 | }
73 | const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock);
74 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
75 | }
76 | }
77 |
78 | namespace detectron2 {
79 |
80 | at::Tensor nms_rotated_cuda(
81 | // input must be contiguous
82 | const at::Tensor& dets,
83 | const at::Tensor& scores,
84 | float iou_threshold) {
85 | // using scalar_t = float;
86 | AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
87 | AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
88 | at::cuda::CUDAGuard device_guard(dets.device());
89 |
90 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
91 | auto dets_sorted = dets.index_select(0, order_t);
92 |
93 | auto dets_num = dets.size(0);
94 |
95 | const int col_blocks =
96 | at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock);
97 |
98 | at::Tensor mask =
99 | at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
100 |
101 | dim3 blocks(col_blocks, col_blocks);
102 | dim3 threads(threadsPerBlock);
103 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
104 |
105 | AT_DISPATCH_FLOATING_TYPES(
106 | dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] {
107 | nms_rotated_cuda_kernel<<>>(
108 | dets_num,
109 | iou_threshold,
110 | dets_sorted.data_ptr(),
111 | (unsigned long long*)mask.data_ptr());
112 | });
113 |
114 | at::Tensor mask_cpu = mask.to(at::kCPU);
115 | unsigned long long* mask_host =
116 | (unsigned long long*)mask_cpu.data_ptr();
117 |
118 | std::vector remv(col_blocks);
119 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
120 |
121 | at::Tensor keep =
122 | at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
123 | int64_t* keep_out = keep.data_ptr();
124 |
125 | int num_to_keep = 0;
126 | for (int i = 0; i < dets_num; i++) {
127 | int nblock = i / threadsPerBlock;
128 | int inblock = i % threadsPerBlock;
129 |
130 | if (!(remv[nblock] & (1ULL << inblock))) {
131 | keep_out[num_to_keep++] = i;
132 | unsigned long long* p = mask_host + i * col_blocks;
133 | for (int j = nblock; j < col_blocks; j++) {
134 | remv[j] |= p[j];
135 | }
136 | }
137 | }
138 |
139 | AT_CUDA_CHECK(cudaGetLastError());
140 | return order_t.index(
141 | {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
142 | .to(order_t.device(), keep.scalar_type())});
143 | }
144 |
145 | } // namespace detectron2
146 |
--------------------------------------------------------------------------------
/detectron2/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | #include
4 | #include "ROIAlign/ROIAlign.h"
5 | #include "ROIAlignRotated/ROIAlignRotated.h"
6 | #include "box_iou_rotated/box_iou_rotated.h"
7 | #include "cocoeval/cocoeval.h"
8 | #include "deformable/deform_conv.h"
9 | #include "nms_rotated/nms_rotated.h"
10 |
11 | namespace detectron2 {
12 |
13 | #if defined(WITH_CUDA) || defined(WITH_HIP)
14 | extern int get_cudart_version();
15 | #endif
16 |
17 | std::string get_cuda_version() {
18 | #if defined(WITH_CUDA) || defined(WITH_HIP)
19 | std::ostringstream oss;
20 |
21 | #if defined(WITH_CUDA)
22 | oss << "CUDA ";
23 | #else
24 | oss << "HIP ";
25 | #endif
26 |
27 | // copied from
28 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
29 | auto printCudaStyleVersion = [&](int v) {
30 | oss << (v / 1000) << "." << (v / 10 % 100);
31 | if (v % 10 != 0) {
32 | oss << "." << (v % 10);
33 | }
34 | };
35 | printCudaStyleVersion(get_cudart_version());
36 | return oss.str();
37 | #else // neither CUDA nor HIP
38 | return std::string("not available");
39 | #endif
40 | }
41 |
42 | // similar to
43 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
44 | std::string get_compiler_version() {
45 | std::ostringstream ss;
46 | #if defined(__GNUC__)
47 | #ifndef __clang__
48 |
49 | #if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8))
50 | #error "GCC >= 4.9 is required!"
51 | #endif
52 |
53 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
54 | #endif
55 | #endif
56 |
57 | #if defined(__clang_major__)
58 | {
59 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
60 | << __clang_patchlevel__;
61 | }
62 | #endif
63 |
64 | #if defined(_MSC_VER)
65 | { ss << "MSVC " << _MSC_FULL_VER; }
66 | #endif
67 | return ss.str();
68 | }
69 |
70 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
71 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
72 | m.def("get_cuda_version", &get_cuda_version, "get_cuda_version");
73 |
74 | m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
75 |
76 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
77 | m.def(
78 | "deform_conv_backward_input",
79 | &deform_conv_backward_input,
80 | "deform_conv_backward_input");
81 | m.def(
82 | "deform_conv_backward_filter",
83 | &deform_conv_backward_filter,
84 | "deform_conv_backward_filter");
85 | m.def(
86 | "modulated_deform_conv_forward",
87 | &modulated_deform_conv_forward,
88 | "modulated_deform_conv_forward");
89 | m.def(
90 | "modulated_deform_conv_backward",
91 | &modulated_deform_conv_backward,
92 | "modulated_deform_conv_backward");
93 |
94 | m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
95 |
96 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
97 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
98 |
99 | m.def(
100 | "roi_align_rotated_forward",
101 | &ROIAlignRotated_forward,
102 | "Forward pass for Rotated ROI-Align Operator");
103 | m.def(
104 | "roi_align_rotated_backward",
105 | &ROIAlignRotated_backward,
106 | "Backward pass for Rotated ROI-Align Operator");
107 |
108 | m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
109 | m.def(
110 | "COCOevalEvaluateImages",
111 | &COCOeval::EvaluateImages,
112 | "COCOeval::EvaluateImages");
113 | pybind11::class_(m, "InstanceAnnotation")
114 | .def(pybind11::init());
115 | pybind11::class_(m, "ImageEvaluation")
116 | .def(pybind11::init<>());
117 | }
118 | } // namespace detectron2
119 |
--------------------------------------------------------------------------------
/detectron2/layers/roi_align.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from torch import nn
3 | from torchvision.ops import roi_align as tv_roi_align
4 |
5 | try:
6 | from torchvision import __version__
7 |
8 | version = tuple(int(x) for x in __version__.split(".")[:2])
9 | USE_TORCHVISION = version >= (0, 7) # https://github.com/pytorch/vision/pull/2438
10 | except ImportError: # only open source torchvision has __version__
11 | USE_TORCHVISION = True
12 |
13 |
14 | if USE_TORCHVISION:
15 | roi_align = tv_roi_align
16 | else:
17 | from torch.nn.modules.utils import _pair
18 | from torch.autograd import Function
19 | from torch.autograd.function import once_differentiable
20 | from detectron2 import _C
21 |
22 | class _ROIAlign(Function):
23 | @staticmethod
24 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
25 | ctx.save_for_backward(roi)
26 | ctx.output_size = _pair(output_size)
27 | ctx.spatial_scale = spatial_scale
28 | ctx.sampling_ratio = sampling_ratio
29 | ctx.input_shape = input.size()
30 | ctx.aligned = aligned
31 | output = _C.roi_align_forward(
32 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
33 | )
34 | return output
35 |
36 | @staticmethod
37 | @once_differentiable
38 | def backward(ctx, grad_output):
39 | (rois,) = ctx.saved_tensors
40 | output_size = ctx.output_size
41 | spatial_scale = ctx.spatial_scale
42 | sampling_ratio = ctx.sampling_ratio
43 | bs, ch, h, w = ctx.input_shape
44 | grad_input = _C.roi_align_backward(
45 | grad_output,
46 | rois,
47 | spatial_scale,
48 | output_size[0],
49 | output_size[1],
50 | bs,
51 | ch,
52 | h,
53 | w,
54 | sampling_ratio,
55 | ctx.aligned,
56 | )
57 | return grad_input, None, None, None, None, None
58 |
59 | roi_align = _ROIAlign.apply
60 |
61 |
62 | # NOTE: torchvision's RoIAlign has a different default aligned=False
63 | class ROIAlign(nn.Module):
64 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
65 | """
66 | Args:
67 | output_size (tuple): h, w
68 | spatial_scale (float): scale the input boxes by this number
69 | sampling_ratio (int): number of inputs samples to take for each output
70 | sample. 0 to take samples densely.
71 | aligned (bool): if False, use the legacy implementation in
72 | Detectron. If True, align the results more perfectly.
73 |
74 | Note:
75 | The meaning of aligned=True:
76 |
77 | Given a continuous coordinate c, its two neighboring pixel indices (in our
78 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
79 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
80 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
81 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
82 | pixel indices and therefore it uses pixels with a slightly incorrect alignment
83 | (relative to our pixel model) when performing bilinear interpolation.
84 |
85 | With `aligned=True`,
86 | we first appropriately scale the ROI and then shift it by -0.5
87 | prior to calling roi_align. This produces the correct neighbors; see
88 | detectron2/tests/test_roi_align.py for verification.
89 |
90 | The difference does not make a difference to the model's performance if
91 | ROIAlign is used together with conv layers.
92 | """
93 | super(ROIAlign, self).__init__()
94 | self.output_size = output_size
95 | self.spatial_scale = spatial_scale
96 | self.sampling_ratio = sampling_ratio
97 | self.aligned = aligned
98 |
99 | def forward(self, input, rois):
100 | """
101 | Args:
102 | input: NCHW images
103 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
104 | """
105 | assert rois.dim() == 2 and rois.size(1) == 5
106 | return roi_align(
107 | input,
108 | rois.to(dtype=input.dtype),
109 | self.output_size,
110 | self.spatial_scale,
111 | self.sampling_ratio,
112 | self.aligned,
113 | )
114 |
115 | def __repr__(self):
116 | tmpstr = self.__class__.__name__ + "("
117 | tmpstr += "output_size=" + str(self.output_size)
118 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
119 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
120 | tmpstr += ", aligned=" + str(self.aligned)
121 | tmpstr += ")"
122 | return tmpstr
123 |
--------------------------------------------------------------------------------
/detectron2/layers/roi_align_rotated.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import torch
3 | from torch import nn
4 | from torch.autograd import Function
5 | from torch.autograd.function import once_differentiable
6 | from torch.nn.modules.utils import _pair
7 |
8 | from detectron2 import _C
9 |
10 |
11 | class _ROIAlignRotated(Function):
12 | @staticmethod
13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 | ctx.save_for_backward(roi)
15 | ctx.output_size = _pair(output_size)
16 | ctx.spatial_scale = spatial_scale
17 | ctx.sampling_ratio = sampling_ratio
18 | ctx.input_shape = input.size()
19 | output = _C.roi_align_rotated_forward(
20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 | )
22 | return output
23 |
24 | @staticmethod
25 | @once_differentiable
26 | def backward(ctx, grad_output):
27 | (rois,) = ctx.saved_tensors
28 | output_size = ctx.output_size
29 | spatial_scale = ctx.spatial_scale
30 | sampling_ratio = ctx.sampling_ratio
31 | bs, ch, h, w = ctx.input_shape
32 | grad_input = _C.roi_align_rotated_backward(
33 | grad_output,
34 | rois,
35 | spatial_scale,
36 | output_size[0],
37 | output_size[1],
38 | bs,
39 | ch,
40 | h,
41 | w,
42 | sampling_ratio,
43 | )
44 | return grad_input, None, None, None, None, None
45 |
46 |
47 | roi_align_rotated = _ROIAlignRotated.apply
48 |
49 |
50 | class ROIAlignRotated(nn.Module):
51 | def __init__(self, output_size, spatial_scale, sampling_ratio):
52 | """
53 | Args:
54 | output_size (tuple): h, w
55 | spatial_scale (float): scale the input boxes by this number
56 | sampling_ratio (int): number of inputs samples to take for each output
57 | sample. 0 to take samples densely.
58 |
59 | Note:
60 | ROIAlignRotated supports continuous coordinate by default:
61 | Given a continuous coordinate c, its two neighboring pixel indices (in our
62 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
63 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
64 | from the underlying signal at continuous coordinates 0.5 and 1.5).
65 | """
66 | super(ROIAlignRotated, self).__init__()
67 | self.output_size = output_size
68 | self.spatial_scale = spatial_scale
69 | self.sampling_ratio = sampling_ratio
70 |
71 | def forward(self, input, rois):
72 | """
73 | Args:
74 | input: NCHW images
75 | rois: Bx6 boxes. First column is the index into N.
76 | The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
77 | """
78 | assert rois.dim() == 2 and rois.size(1) == 6
79 | orig_dtype = input.dtype
80 | if orig_dtype == torch.float16:
81 | input = input.float()
82 | rois = rois.float()
83 | return roi_align_rotated(
84 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
85 | ).to(dtype=orig_dtype)
86 |
87 | def __repr__(self):
88 | tmpstr = self.__class__.__name__ + "("
89 | tmpstr += "output_size=" + str(self.output_size)
90 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
91 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
92 | tmpstr += ")"
93 | return tmpstr
94 |
--------------------------------------------------------------------------------
/detectron2/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from __future__ import absolute_import, division, print_function, unicode_literals
3 |
4 | from detectron2 import _C
5 |
6 |
7 | def pairwise_iou_rotated(boxes1, boxes2):
8 | """
9 | Return intersection-over-union (Jaccard index) of boxes.
10 |
11 | Both sets of boxes are expected to be in
12 | (x_center, y_center, width, height, angle) format.
13 |
14 | Arguments:
15 | boxes1 (Tensor[N, 5])
16 | boxes2 (Tensor[M, 5])
17 |
18 | Returns:
19 | iou (Tensor[N, M]): the NxM matrix containing the pairwise
20 | IoU values for every element in boxes1 and boxes2
21 | """
22 | return _C.box_iou_rotated(boxes1, boxes2)
23 |
--------------------------------------------------------------------------------
/detectron2/layers/shape_spec.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | from collections import namedtuple
4 |
5 |
6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
7 | """
8 | A simple structure that contains basic shape specification about a tensor.
9 | It is often used as the auxiliary inputs/outputs of models,
10 | to complement the lack of shape inference ability among pytorch modules.
11 |
12 | Attributes:
13 | channels:
14 | height:
15 | width:
16 | stride:
17 | """
18 |
19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 | return super().__new__(cls, channels, height, width, stride)
21 |
--------------------------------------------------------------------------------
/detectron2/layers/wrappers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | """
3 | Wrappers around on some nn functions, mainly to support empty tensors.
4 |
5 | Ideally, add support directly in PyTorch to empty tensors in those functions.
6 |
7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013
8 | is implemented
9 | """
10 |
11 | from typing import List
12 | import torch
13 | from torch.nn import functional as F
14 |
15 | from detectron2.utils.env import TORCH_VERSION
16 |
17 |
18 | def cat(tensors: List[torch.Tensor], dim: int = 0):
19 | """
20 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list
21 | """
22 | assert isinstance(tensors, (list, tuple))
23 | if len(tensors) == 1:
24 | return tensors[0]
25 | return torch.cat(tensors, dim)
26 |
27 |
28 | class _NewEmptyTensorOp(torch.autograd.Function):
29 | @staticmethod
30 | def forward(ctx, x, new_shape):
31 | ctx.shape = x.shape
32 | return x.new_empty(new_shape)
33 |
34 | @staticmethod
35 | def backward(ctx, grad):
36 | shape = ctx.shape
37 | return _NewEmptyTensorOp.apply(grad, shape), None
38 |
39 |
40 | class Conv2d(torch.nn.Conv2d):
41 | """
42 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
43 | """
44 |
45 | def __init__(self, *args, **kwargs):
46 | """
47 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
48 |
49 | Args:
50 | norm (nn.Module, optional): a normalization layer
51 | activation (callable(Tensor) -> Tensor): a callable activation function
52 |
53 | It assumes that norm layer is used before activation.
54 | """
55 | norm = kwargs.pop("norm", None)
56 | activation = kwargs.pop("activation", None)
57 | super().__init__(*args, **kwargs)
58 |
59 | self.norm = norm
60 | self.activation = activation
61 |
62 | def forward(self, x):
63 | # torchscript does not support SyncBatchNorm yet
64 | # https://github.com/pytorch/pytorch/issues/40507
65 | # and we skip these codes in torchscript since:
66 | # 1. currently we only support torchscript in evaluation mode
67 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
68 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
69 | if not torch.jit.is_scripting():
70 | if x.numel() == 0 and self.training:
71 | # https://github.com/pytorch/pytorch/issues/12013
72 | assert not isinstance(
73 | self.norm, torch.nn.SyncBatchNorm
74 | ), "SyncBatchNorm does not support empty inputs!"
75 |
76 | x = F.conv2d(
77 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
78 | )
79 | if self.norm is not None:
80 | x = self.norm(x)
81 | if self.activation is not None:
82 | x = self.activation(x)
83 | return x
84 |
85 |
86 | ConvTranspose2d = torch.nn.ConvTranspose2d
87 | BatchNorm2d = torch.nn.BatchNorm2d
88 | interpolate = F.interpolate
89 |
90 |
91 | if TORCH_VERSION > (1, 5):
92 | Linear = torch.nn.Linear
93 | else:
94 |
95 | class Linear(torch.nn.Linear):
96 | """
97 | A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features.
98 | Because of https://github.com/pytorch/pytorch/issues/34202
99 | """
100 |
101 | def forward(self, x):
102 | if x.numel() == 0:
103 | output_shape = [x.shape[0], self.weight.shape[0]]
104 |
105 | empty = _NewEmptyTensorOp.apply(x, output_shape)
106 | if self.training:
107 | # This is to make DDP happy.
108 | # DDP expects all workers to have gradient w.r.t the same set of parameters.
109 | _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
110 | return empty + _dummy
111 | else:
112 | return empty
113 |
114 | x = super().forward(x)
115 | return x
116 |
117 |
118 | def nonzero_tuple(x):
119 | """
120 | A 'as_tuple=True' version of torch.nonzero to support torchscript.
121 | because of https://github.com/pytorch/pytorch/issues/38718
122 | """
123 | if torch.jit.is_scripting():
124 | if x.dim() == 0:
125 | return x.unsqueeze(0).nonzero().unbind(1)
126 | return x.nonzero().unbind(1)
127 | else:
128 | return x.nonzero(as_tuple=True)
129 |
--------------------------------------------------------------------------------
/detectron2/model_zoo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | """
3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and
4 | optionally load pre-trained weights as released in
5 | `MODEL_ZOO.md `_.
6 | """
7 | from .model_zoo import get, get_config_file, get_checkpoint_url, get_config
8 |
9 | __all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"]
10 |
--------------------------------------------------------------------------------
/detectron2/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from detectron2.layers import ShapeSpec
3 |
4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
5 | from .backbone import (
6 | BACKBONE_REGISTRY,
7 | FPN,
8 | Backbone,
9 | ResNet,
10 | ResNetBlockBase,
11 | build_backbone,
12 | build_resnet_backbone,
13 | make_stage,
14 | )
15 | from .meta_arch import (
16 | META_ARCH_REGISTRY,
17 | SEM_SEG_HEADS_REGISTRY,
18 | GeneralizedRCNN,
19 | PanopticFPN,
20 | ProposalNetwork,
21 | RetinaNet,
22 | SemanticSegmentor,
23 | build_model,
24 | build_sem_seg_head,
25 | )
26 | from .postprocessing import detector_postprocess
27 | from .proposal_generator import (
28 | PROPOSAL_GENERATOR_REGISTRY,
29 | build_proposal_generator,
30 | RPN_HEAD_REGISTRY,
31 | build_rpn_head,
32 | )
33 | from .roi_heads import (
34 | ROI_BOX_HEAD_REGISTRY,
35 | ROI_HEADS_REGISTRY,
36 | ROI_KEYPOINT_HEAD_REGISTRY,
37 | ROI_MASK_HEAD_REGISTRY,
38 | ROIHeads,
39 | StandardROIHeads,
40 | BaseMaskRCNNHead,
41 | BaseKeypointRCNNHead,
42 | FastRCNNOutputLayers,
43 | build_box_head,
44 | build_keypoint_head,
45 | build_mask_head,
46 | build_roi_heads,
47 | )
48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
49 |
50 | _EXCLUDE = {"ShapeSpec"}
51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
52 |
--------------------------------------------------------------------------------
/detectron2/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip
3 |
4 | from .backbone import Backbone
5 | from .fpn import FPN
6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
7 |
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 | # TODO can expose more resnet blocks after careful consideration
10 |
--------------------------------------------------------------------------------
/detectron2/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from abc import ABCMeta, abstractmethod
3 | import torch.nn as nn
4 |
5 | from detectron2.layers import ShapeSpec
6 |
7 | __all__ = ["Backbone"]
8 |
9 |
10 | class Backbone(nn.Module, metaclass=ABCMeta):
11 | """
12 | Abstract base class for network backbones.
13 | """
14 |
15 | def __init__(self):
16 | """
17 | The `__init__` method of any subclass can specify its own set of arguments.
18 | """
19 | super().__init__()
20 |
21 | @abstractmethod
22 | def forward(self):
23 | """
24 | Subclasses must override this method, but adhere to the same return type.
25 |
26 | Returns:
27 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
28 | """
29 | pass
30 |
31 | @property
32 | def size_divisibility(self) -> int:
33 | """
34 | Some backbones require the input height and width to be divisible by a
35 | specific integer. This is typically true for encoder / decoder type networks
36 | with lateral connection (e.g., FPN) for which feature maps need to match
37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
38 | input size divisibility is required.
39 | """
40 | return 0
41 |
42 | def output_shape(self):
43 | """
44 | Returns:
45 | dict[str->ShapeSpec]
46 | """
47 | # this is a backward-compatible default
48 | return {
49 | name: ShapeSpec(
50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
51 | )
52 | for name in self._out_features
53 | }
54 |
--------------------------------------------------------------------------------
/detectron2/modeling/backbone/build.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from detectron2.layers import ShapeSpec
3 | from detectron2.utils.registry import Registry
4 |
5 | from .backbone import Backbone
6 |
7 | BACKBONE_REGISTRY = Registry("BACKBONE")
8 | BACKBONE_REGISTRY.__doc__ = """
9 | Registry for backbones, which extract feature maps from images
10 |
11 | The registered object must be a callable that accepts two arguments:
12 |
13 | 1. A :class:`detectron2.config.CfgNode`
14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
15 |
16 | Registered object must return instance of :class:`Backbone`.
17 | """
18 |
19 |
20 | def build_backbone(cfg, input_shape=None):
21 | """
22 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
23 |
24 | Returns:
25 | an instance of :class:`Backbone`
26 | """
27 | if input_shape is None:
28 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
29 |
30 | backbone_name = cfg.MODEL.BACKBONE.NAME
31 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
32 | assert isinstance(backbone, Backbone)
33 | return backbone
34 |
--------------------------------------------------------------------------------
/detectron2/modeling/matcher.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from typing import List
3 | import torch
4 |
5 | from detectron2.layers import nonzero_tuple
6 |
7 |
8 | class Matcher(object):
9 | """
10 | This class assigns to each predicted "element" (e.g., a box) a ground-truth
11 | element. Each predicted element will have exactly zero or one matches; each
12 | ground-truth element may be matched to zero or more predicted elements.
13 |
14 | The matching is determined by the MxN match_quality_matrix, that characterizes
15 | how well each (ground-truth, prediction)-pair match each other. For example,
16 | if the elements are boxes, this matrix may contain box intersection-over-union
17 | overlap values.
18 |
19 | The matcher returns (a) a vector of length N containing the index of the
20 | ground-truth element m in [0, M) that matches to prediction n in [0, N).
21 | (b) a vector of length N containing the labels for each prediction.
22 | """
23 |
24 | def __init__(
25 | self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
26 | ):
27 | """
28 | Args:
29 | thresholds (list): a list of thresholds used to stratify predictions
30 | into levels.
31 | labels (list): a list of values to label predictions belonging at
32 | each level. A label can be one of {-1, 0, 1} signifying
33 | {ignore, negative class, positive class}, respectively.
34 | allow_low_quality_matches (bool): if True, produce additional matches
35 | for predictions with maximum match quality lower than high_threshold.
36 | See set_low_quality_matches_ for more details.
37 |
38 | For example,
39 | thresholds = [0.3, 0.5]
40 | labels = [0, -1, 1]
41 | All predictions with iou < 0.3 will be marked with 0 and
42 | thus will be considered as false positives while training.
43 | All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
44 | thus will be ignored.
45 | All predictions with 0.5 <= iou will be marked with 1 and
46 | thus will be considered as true positives.
47 | """
48 | # Add -inf and +inf to first and last position in thresholds
49 | thresholds = thresholds[:]
50 | assert thresholds[0] > 0
51 | thresholds.insert(0, -float("inf"))
52 | thresholds.append(float("inf"))
53 | # Currently torchscript does not support all + generator
54 | assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
55 | assert all([l in [-1, 0, 1] for l in labels])
56 | assert len(labels) == len(thresholds) - 1
57 | self.thresholds = thresholds
58 | self.labels = labels
59 | self.allow_low_quality_matches = allow_low_quality_matches
60 |
61 | def __call__(self, match_quality_matrix):
62 | """
63 | Args:
64 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the
65 | pairwise quality between M ground-truth elements and N predicted
66 | elements. All elements must be >= 0 (due to the us of `torch.nonzero`
67 | for selecting indices in :meth:`set_low_quality_matches_`).
68 |
69 | Returns:
70 | matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
71 | ground-truth index in [0, M)
72 | match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
73 | whether a prediction is a true or false positive or ignored
74 | """
75 | assert match_quality_matrix.dim() == 2
76 | if match_quality_matrix.numel() == 0:
77 | default_matches = match_quality_matrix.new_full(
78 | (match_quality_matrix.size(1),), 0, dtype=torch.int64
79 | )
80 | # When no gt boxes exist, we define IOU = 0 and therefore set labels
81 | # to `self.labels[0]`, which usually defaults to background class 0
82 | # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
83 | default_match_labels = match_quality_matrix.new_full(
84 | (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
85 | )
86 | return default_matches, default_match_labels
87 |
88 | assert torch.all(match_quality_matrix >= 0)
89 |
90 | # match_quality_matrix is M (gt) x N (predicted)
91 | # Max over gt elements (dim 0) to find best gt candidate for each prediction
92 | matched_vals, matches = match_quality_matrix.max(dim=0)
93 |
94 | match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
95 |
96 | for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
97 | low_high = (matched_vals >= low) & (matched_vals < high)
98 | match_labels[low_high] = l
99 |
100 | if self.allow_low_quality_matches:
101 | self.set_low_quality_matches_(match_labels, match_quality_matrix)
102 |
103 | return matches, match_labels
104 |
105 | def set_low_quality_matches_(self, match_labels, match_quality_matrix):
106 | """
107 | Produce additional matches for predictions that have only low-quality matches.
108 | Specifically, for each ground-truth G find the set of predictions that have
109 | maximum overlap with it (including ties); for each prediction in that set, if
110 | it is unmatched, then match it to the ground-truth G.
111 |
112 | This function implements the RPN assignment case (i) in Sec. 3.1.2 of
113 | :paper:`Faster R-CNN`.
114 | """
115 | # For each gt, find the prediction with which it has highest quality
116 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
117 | # Find the highest quality match available, even if it is low, including ties.
118 | # Note that the matches qualities must be positive due to the use of
119 | # `torch.nonzero`.
120 | _, pred_inds_with_highest_quality = nonzero_tuple(
121 | match_quality_matrix == highest_quality_foreach_gt[:, None]
122 | )
123 | # If an anchor was labeled positive only due to a low-quality match
124 | # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
125 | # This follows the implementation in Detectron, and is found to have no significant impact.
126 | match_labels[pred_inds_with_highest_quality] = 1
127 |
--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 |
4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip
5 |
6 | from .panoptic_fpn import PanopticFPN
7 |
8 | # import all the meta_arch, so they will be registered
9 | from .rcnn import GeneralizedRCNN, ProposalNetwork
10 | from .retinanet import RetinaNet
11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
12 |
13 |
14 | __all__ = list(globals().keys())
15 |
--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import torch
3 |
4 | from detectron2.utils.registry import Registry
5 |
6 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip
7 | META_ARCH_REGISTRY.__doc__ = """
8 | Registry for meta-architectures, i.e. the whole model.
9 |
10 | The registered object will be called with `obj(cfg)`
11 | and expected to return a `nn.Module` object.
12 | """
13 |
14 |
15 | def build_model(cfg):
16 | """
17 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
18 | Note that it does not load any weights from ``cfg``.
19 | """
20 | meta_arch = cfg.MODEL.META_ARCHITECTURE
21 | model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
22 | model.to(torch.device(cfg.MODEL.DEVICE))
23 | return model
24 |
--------------------------------------------------------------------------------
/detectron2/modeling/postprocessing.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import torch
3 | from torch.nn import functional as F
4 |
5 | from detectron2.layers import paste_masks_in_image
6 | from detectron2.structures import Instances
7 | from detectron2.utils.memory import retry_if_cuda_oom
8 |
9 |
10 | # perhaps should rename to "resize_instance"
11 | def detector_postprocess(
12 | results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5
13 | ):
14 | """
15 | Resize the output instances.
16 | The input images are often resized when entering an object detector.
17 | As a result, we often need the outputs of the detector in a different
18 | resolution from its inputs.
19 |
20 | This function will resize the raw outputs of an R-CNN detector
21 | to produce outputs according to the desired output resolution.
22 |
23 | Args:
24 | results (Instances): the raw outputs from the detector.
25 | `results.image_size` contains the input image resolution the detector sees.
26 | This object might be modified in-place.
27 | output_height, output_width: the desired output resolution.
28 |
29 | Returns:
30 | Instances: the resized output from the model, based on the output resolution
31 | """
32 | # Change to 'if is_tracing' after PT1.7
33 | if isinstance(output_height, torch.Tensor):
34 | # Converts integer tensors to float temporaries to ensure true
35 | # division is performed when computing scale_x and scale_y.
36 | output_width_tmp = output_width.float()
37 | output_height_tmp = output_height.float()
38 | new_size = torch.stack([output_height, output_width])
39 | else:
40 | new_size = (output_height, output_width)
41 | output_width_tmp = output_width
42 | output_height_tmp = output_height
43 |
44 | scale_x, scale_y = (
45 | output_width_tmp / results.image_size[1],
46 | output_height_tmp / results.image_size[0],
47 | )
48 | results = Instances(new_size, **results.get_fields())
49 |
50 | if results.has("pred_boxes"):
51 | output_boxes = results.pred_boxes
52 | elif results.has("proposal_boxes"):
53 | output_boxes = results.proposal_boxes
54 | else:
55 | output_boxes = None
56 | assert output_boxes is not None, "Predictions must contain boxes!"
57 |
58 | output_boxes.scale(scale_x, scale_y)
59 | output_boxes.clip(results.image_size)
60 |
61 | results = results[output_boxes.nonempty()]
62 |
63 | if results.has("pred_masks"):
64 | results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
65 | results.pred_masks[:, 0, :, :], # N, 1, M, M
66 | results.pred_boxes,
67 | results.image_size,
68 | threshold=mask_threshold,
69 | )
70 |
71 | if results.has("pred_keypoints"):
72 | results.pred_keypoints[:, :, 0] *= scale_x
73 | results.pred_keypoints[:, :, 1] *= scale_y
74 |
75 | return results
76 |
77 |
78 | def sem_seg_postprocess(result, img_size, output_height, output_width):
79 | """
80 | Return semantic segmentation predictions in the original resolution.
81 |
82 | The input images are often resized when entering semantic segmentor. Moreover, in same
83 | cases, they also padded inside segmentor to be divisible by maximum network stride.
84 | As a result, we often need the predictions of the segmentor in a different
85 | resolution from its inputs.
86 |
87 | Args:
88 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
89 | where C is the number of classes, and H, W are the height and width of the prediction.
90 | img_size (tuple): image size that segmentor is taking as input.
91 | output_height, output_width: the desired output resolution.
92 |
93 | Returns:
94 | semantic segmentation prediction (Tensor): A tensor of the shape
95 | (C, output_height, output_width) that contains per-pixel soft predictions.
96 | """
97 | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
98 | result = F.interpolate(
99 | result, size=(output_height, output_width), mode="bilinear", align_corners=False
100 | )[0]
101 | return result
102 |
--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
4 |
5 | __all__ = list(globals().keys())
6 |
--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/build.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from detectron2.utils.registry import Registry
3 |
4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
6 | Registry for proposal generator, which produces object proposals from feature maps.
7 |
8 | The registered object will be called with `obj(cfg, input_shape)`.
9 | The call should return a `nn.Module` object.
10 | """
11 |
12 | from . import rpn, rrpn # noqa F401 isort:skip
13 |
14 |
15 | def build_proposal_generator(cfg, input_shape):
16 | """
17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
18 | The name can be "PrecomputedProposals" to use no proposal generator.
19 | """
20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
21 | if name == "PrecomputedProposals":
22 | return None
23 |
24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
25 |
--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead
3 | from .keypoint_head import (
4 | ROI_KEYPOINT_HEAD_REGISTRY,
5 | build_keypoint_head,
6 | BaseKeypointRCNNHead,
7 | KRCNNConvDeconvUpsampleHead,
8 | )
9 | from .mask_head import (
10 | ROI_MASK_HEAD_REGISTRY,
11 | build_mask_head,
12 | BaseMaskRCNNHead,
13 | MaskRCNNConvUpsampleHead,
14 | )
15 | from .roi_heads import (
16 | ROI_HEADS_REGISTRY,
17 | ROIHeads,
18 | Res5ROIHeads,
19 | StandardROIHeads,
20 | build_roi_heads,
21 | select_foreground_proposals,
22 | )
23 | from .rotated_fast_rcnn import RROIHeads
24 | from .fast_rcnn import FastRCNNOutputLayers
25 |
26 | from . import cascade_rcnn # isort:skip
27 |
28 | __all__ = list(globals().keys())
29 |
--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/box_head.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import numpy as np
3 | from typing import List
4 | import fvcore.nn.weight_init as weight_init
5 | import torch
6 | from torch import nn
7 |
8 | from detectron2.config import configurable
9 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm
10 | from detectron2.utils.registry import Registry
11 |
12 | __all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"]
13 |
14 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
15 | ROI_BOX_HEAD_REGISTRY.__doc__ = """
16 | Registry for box heads, which make box predictions from per-region features.
17 |
18 | The registered object will be called with `obj(cfg, input_shape)`.
19 | """
20 |
21 |
22 | # To get torchscript support, we make the head a subclass of `nn.Sequential`.
23 | # Therefore, to add new layers in this head class, please make sure they are
24 | # added in the order they will be used in forward().
25 | @ROI_BOX_HEAD_REGISTRY.register()
26 | class FastRCNNConvFCHead(nn.Sequential):
27 | """
28 | A head with several 3x3 conv layers (each followed by norm & relu) and then
29 | several fc layers (each followed by relu).
30 | """
31 |
32 | @configurable
33 | def __init__(
34 | self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
35 | ):
36 | """
37 | NOTE: this interface is experimental.
38 |
39 | Args:
40 | input_shape (ShapeSpec): shape of the input feature.
41 | conv_dims (list[int]): the output dimensions of the conv layers
42 | fc_dims (list[int]): the output dimensions of the fc layers
43 | conv_norm (str or callable): normalization for the conv layers.
44 | See :func:`detectron2.layers.get_norm` for supported types.
45 | """
46 | super().__init__()
47 | assert len(conv_dims) + len(fc_dims) > 0
48 |
49 | self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
50 |
51 | self.conv_norm_relus = []
52 | for k, conv_dim in enumerate(conv_dims):
53 | conv = Conv2d(
54 | self._output_size[0],
55 | conv_dim,
56 | kernel_size=3,
57 | padding=1,
58 | bias=not conv_norm,
59 | norm=get_norm(conv_norm, conv_dim),
60 | activation=nn.ReLU(),
61 | )
62 | self.add_module("conv{}".format(k + 1), conv)
63 | self.conv_norm_relus.append(conv)
64 | self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
65 |
66 | self.fcs = []
67 | for k, fc_dim in enumerate(fc_dims):
68 | if k == 0:
69 | self.add_module("flatten", nn.Flatten())
70 | fc = Linear(int(np.prod(self._output_size)), fc_dim)
71 | self.add_module("fc{}".format(k + 1), fc)
72 | self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
73 | self.fcs.append(fc)
74 | self._output_size = fc_dim
75 |
76 | for layer in self.conv_norm_relus:
77 | weight_init.c2_msra_fill(layer)
78 | for layer in self.fcs:
79 | weight_init.c2_xavier_fill(layer)
80 |
81 | @classmethod
82 | def from_config(cls, cfg, input_shape):
83 | num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
84 | conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
85 | num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
86 | fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
87 | return {
88 | "input_shape": input_shape,
89 | "conv_dims": [conv_dim] * num_conv,
90 | "fc_dims": [fc_dim] * num_fc,
91 | "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
92 | }
93 |
94 | def forward(self, x):
95 | for layer in self:
96 | x = layer(x)
97 | return x
98 |
99 | @property
100 | @torch.jit.unused
101 | def output_shape(self):
102 | """
103 | Returns:
104 | ShapeSpec: the output feature shape
105 | """
106 | o = self._output_size
107 | if isinstance(o, int):
108 | return ShapeSpec(channels=o)
109 | else:
110 | return ShapeSpec(channels=o[0], height=o[1], width=o[2])
111 |
112 |
113 | def build_box_head(cfg, input_shape):
114 | """
115 | Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
116 | """
117 | name = cfg.MODEL.ROI_BOX_HEAD.NAME
118 | return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
119 |
--------------------------------------------------------------------------------
/detectron2/modeling/sampling.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import torch
3 |
4 | from detectron2.layers import nonzero_tuple
5 |
6 | __all__ = ["subsample_labels"]
7 |
8 |
9 | def subsample_labels(
10 | labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
11 | ):
12 | """
13 | Return `num_samples` (or fewer, if not enough found)
14 | random samples from `labels` which is a mixture of positives & negatives.
15 | It will try to return as many positives as possible without
16 | exceeding `positive_fraction * num_samples`, and then try to
17 | fill the remaining slots with negatives.
18 |
19 | Args:
20 | labels (Tensor): (N, ) label vector with values:
21 | * -1: ignore
22 | * bg_label: background ("negative") class
23 | * otherwise: one or more foreground ("positive") classes
24 | num_samples (int): The total number of labels with value >= 0 to return.
25 | Values that are not sampled will be filled with -1 (ignore).
26 | positive_fraction (float): The number of subsampled labels with values > 0
27 | is `min(num_positives, int(positive_fraction * num_samples))`. The number
28 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
29 | In order words, if there are not enough positives, the sample is filled with
30 | negatives. If there are also not enough negatives, then as many elements are
31 | sampled as is possible.
32 | bg_label (int): label index of background ("negative") class.
33 |
34 | Returns:
35 | pos_idx, neg_idx (Tensor):
36 | 1D vector of indices. The total length of both is `num_samples` or fewer.
37 | """
38 | positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
39 | negative = nonzero_tuple(labels == bg_label)[0]
40 |
41 | num_pos = int(num_samples * positive_fraction)
42 | # protect against not enough positive examples
43 | num_pos = min(positive.numel(), num_pos)
44 | num_neg = num_samples - num_pos
45 | # protect against not enough negative examples
46 | num_neg = min(negative.numel(), num_neg)
47 |
48 | # randomly select positive and negative examples
49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 |
52 | pos_idx = positive[perm1]
53 | neg_idx = negative[perm2]
54 | return pos_idx, neg_idx
55 |
--------------------------------------------------------------------------------
/detectron2/projects/README.md:
--------------------------------------------------------------------------------
1 |
2 | Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here.
3 |
--------------------------------------------------------------------------------
/detectron2/projects/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import importlib
3 | from pathlib import Path
4 |
5 | _PROJECTS = {
6 | "point_rend": "PointRend",
7 | "deeplab": "DeepLab",
8 | "panoptic_deeplab": "Panoptic-DeepLab",
9 | }
10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
11 |
12 | if _PROJECT_ROOT.is_dir():
13 | # This is true only for in-place installation (pip install -e, setup.py develop),
14 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
15 |
16 | class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
17 | def find_spec(self, name, path, target=None):
18 | if not name.startswith("detectron2.projects."):
19 | return
20 | project_name = name.split(".")[-1]
21 | project_dir = _PROJECTS.get(project_name)
22 | if not project_dir:
23 | return
24 | target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
25 | if not target_file.is_file():
26 | return
27 | return importlib.util.spec_from_file_location(name, target_file)
28 |
29 | import sys
30 |
31 | sys.meta_path.append(_D2ProjectsFinder())
32 |
--------------------------------------------------------------------------------
/detectron2/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
4 |
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 |
--------------------------------------------------------------------------------
/detectron2/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import math
3 | from bisect import bisect_right
4 | from typing import List
5 | import torch
6 |
7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
8 | # only on epoch boundaries. We typically use iteration based schedules instead.
9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
10 | # "iteration" instead.
11 |
12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
14 |
15 |
16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
17 | def __init__(
18 | self,
19 | optimizer: torch.optim.Optimizer,
20 | milestones: List[int],
21 | gamma: float = 0.1,
22 | warmup_factor: float = 0.001,
23 | warmup_iters: int = 1000,
24 | warmup_method: str = "linear",
25 | last_epoch: int = -1,
26 | ):
27 | if not list(milestones) == sorted(milestones):
28 | raise ValueError(
29 | "Milestones should be a list of" " increasing integers. Got {}", milestones
30 | )
31 | self.milestones = milestones
32 | self.gamma = gamma
33 | self.warmup_factor = warmup_factor
34 | self.warmup_iters = warmup_iters
35 | self.warmup_method = warmup_method
36 | super().__init__(optimizer, last_epoch)
37 |
38 | def get_lr(self) -> List[float]:
39 | warmup_factor = _get_warmup_factor_at_iter(
40 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
41 | )
42 | return [
43 | base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
44 | for base_lr in self.base_lrs
45 | ]
46 |
47 | def _compute_values(self) -> List[float]:
48 | # The new interface
49 | return self.get_lr()
50 |
51 |
52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
53 | def __init__(
54 | self,
55 | optimizer: torch.optim.Optimizer,
56 | max_iters: int,
57 | warmup_factor: float = 0.001,
58 | warmup_iters: int = 1000,
59 | warmup_method: str = "linear",
60 | last_epoch: int = -1,
61 | ):
62 | self.max_iters = max_iters
63 | self.warmup_factor = warmup_factor
64 | self.warmup_iters = warmup_iters
65 | self.warmup_method = warmup_method
66 | super().__init__(optimizer, last_epoch)
67 |
68 | def get_lr(self) -> List[float]:
69 | warmup_factor = _get_warmup_factor_at_iter(
70 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
71 | )
72 | # Different definitions of half-cosine with warmup are possible. For
73 | # simplicity we multiply the standard half-cosine schedule by the warmup
74 | # factor. An alternative is to start the period of the cosine at warmup_iters
75 | # instead of at 0. In the case that warmup_iters << max_iters the two are
76 | # very close to each other.
77 | return [
78 | base_lr
79 | * warmup_factor
80 | * 0.5
81 | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
82 | for base_lr in self.base_lrs
83 | ]
84 |
85 | def _compute_values(self) -> List[float]:
86 | # The new interface
87 | return self.get_lr()
88 |
89 |
90 | def _get_warmup_factor_at_iter(
91 | method: str, iter: int, warmup_iters: int, warmup_factor: float
92 | ) -> float:
93 | """
94 | Return the learning rate warmup factor at a specific iteration.
95 | See :paper:`ImageNet in 1h` for more details.
96 |
97 | Args:
98 | method (str): warmup method; either "constant" or "linear".
99 | iter (int): iteration at which to calculate the warmup factor.
100 | warmup_iters (int): the number of warmup iterations.
101 | warmup_factor (float): the base warmup factor (the meaning changes according
102 | to the method used).
103 |
104 | Returns:
105 | float: the effective warmup factor at the given iteration.
106 | """
107 | if iter >= warmup_iters:
108 | return 1.0
109 |
110 | if method == "constant":
111 | return warmup_factor
112 | elif method == "linear":
113 | alpha = iter / warmup_iters
114 | return warmup_factor * (1 - alpha) + alpha
115 | else:
116 | raise ValueError("Unknown warmup method: {}".format(method))
117 |
--------------------------------------------------------------------------------
/detectron2/structures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
3 | from .image_list import ImageList
4 |
5 | from .instances import Instances
6 | from .keypoints import Keypoints, heatmaps_to_keypoints
7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask
8 | from .rotated_boxes import RotatedBoxes
9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
10 |
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
12 |
--------------------------------------------------------------------------------
/detectron2/structures/image_list.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from __future__ import division
3 | from typing import Any, List, Tuple
4 | import torch
5 | from torch import device
6 | from torch.nn import functional as F
7 |
8 | from detectron2.utils.env import TORCH_VERSION
9 |
10 |
11 | class ImageList(object):
12 | """
13 | Structure that holds a list of images (of possibly
14 | varying sizes) as a single tensor.
15 | This works by padding the images to the same size,
16 | and storing in a field the original sizes of each image
17 |
18 | Attributes:
19 | image_sizes (list[tuple[int, int]]): each tuple is (h, w)
20 | """
21 |
22 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
23 | """
24 | Arguments:
25 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
26 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
27 | be smaller than (H, W) due to padding.
28 | """
29 | self.tensor = tensor
30 | self.image_sizes = image_sizes
31 |
32 | def __len__(self) -> int:
33 | return len(self.image_sizes)
34 |
35 | def __getitem__(self, idx) -> torch.Tensor:
36 | """
37 | Access the individual image in its original size.
38 |
39 | Args:
40 | idx: int or slice
41 |
42 | Returns:
43 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
44 | """
45 | size = self.image_sizes[idx]
46 | return self.tensor[idx, ..., : size[0], : size[1]]
47 |
48 | @torch.jit.unused
49 | def to(self, *args: Any, **kwargs: Any) -> "ImageList":
50 | cast_tensor = self.tensor.to(*args, **kwargs)
51 | return ImageList(cast_tensor, self.image_sizes)
52 |
53 | @property
54 | def device(self) -> device:
55 | return self.tensor.device
56 |
57 | @staticmethod
58 | def from_tensors(
59 | tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
60 | ) -> "ImageList":
61 | """
62 | Args:
63 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
64 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
65 | to the same shape with `pad_value`.
66 | size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
67 | the common height and width is divisible by `size_divisibility`.
68 | This depends on the model and many models need a divisibility of 32.
69 | pad_value (float): value to pad
70 |
71 | Returns:
72 | an `ImageList`.
73 | """
74 | assert len(tensors) > 0
75 | assert isinstance(tensors, (tuple, list))
76 | for t in tensors:
77 | assert isinstance(t, torch.Tensor), type(t)
78 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
79 |
80 | # Magic code below that handles dynamic shapes for both scripting and tracing ...
81 |
82 | image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
83 |
84 | if torch.jit.is_scripting():
85 | max_size = torch.stack([torch.as_tensor(x) for x in image_sizes]).max(0).values
86 | if size_divisibility > 1:
87 | stride = size_divisibility
88 | # the last two dims are H,W, both subject to divisibility requirement
89 | max_size = (max_size + (stride - 1)) // stride * stride
90 |
91 | max_size: List[int] = max_size.to(dtype=torch.long).tolist()
92 | else:
93 | # https://github.com/pytorch/pytorch/issues/42448
94 | if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
95 | # In tracing mode, x.shape[i] is a scalar Tensor, and should not be converted
96 | # to int: this will cause the traced graph to have hard-coded shapes.
97 | # Instead we convert each shape to a vector with a stack()
98 | image_sizes = [torch.stack(x) for x in image_sizes]
99 |
100 | # maximum (H, W) for the last two dims
101 | # find the maximum in a tracable way
102 | max_size = torch.stack(image_sizes).max(0).values
103 | else:
104 | # Original eager logic here -- not scripting, not tracing:
105 | # (can be unified with scripting after
106 | # https://github.com/pytorch/pytorch/issues/47379)
107 | max_size = torch.as_tensor(
108 | [max(s) for s in zip(*[img.shape[-2:] for img in tensors])]
109 | )
110 |
111 | if size_divisibility > 1:
112 | stride = size_divisibility
113 | # the last two dims are H,W, both subject to divisibility requirement
114 | max_size = (max_size + (stride - 1)) // stride * stride
115 |
116 | if len(tensors) == 1:
117 | # This seems slightly (2%) faster.
118 | # TODO: check whether it's faster for multiple images as well
119 | image_size = image_sizes[0]
120 | padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
121 | batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
122 | else:
123 | # max_size can be a tensor in tracing mode, therefore convert to list
124 | batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
125 | batched_imgs = tensors[0].new_full(batch_shape, pad_value)
126 | for img, pad_img in zip(tensors, batched_imgs):
127 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
128 |
129 | return ImageList(batched_imgs.contiguous(), image_sizes)
130 |
--------------------------------------------------------------------------------
/detectron2/structures/instances.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import itertools
3 | from typing import Any, Dict, List, Tuple, Union
4 | import torch
5 |
6 |
7 | class Instances:
8 | """
9 | This class represents a list of instances in an image.
10 | It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
11 | All fields must have the same ``__len__`` which is the number of instances.
12 |
13 | All other (non-field) attributes of this class are considered private:
14 | they must start with '_' and are not modifiable by a user.
15 |
16 | Some basic usage:
17 |
18 | 1. Set/get/check a field:
19 |
20 | .. code-block:: python
21 |
22 | instances.gt_boxes = Boxes(...)
23 | print(instances.pred_masks) # a tensor of shape (N, H, W)
24 | print('gt_masks' in instances)
25 |
26 | 2. ``len(instances)`` returns the number of instances
27 | 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
28 | and returns a new :class:`Instances`.
29 | Typically, ``indices`` is a integer vector of indices,
30 | or a binary mask of length ``num_instances``
31 |
32 | .. code-block:: python
33 |
34 | category_3_detections = instances[instances.pred_classes == 3]
35 | confident_detections = instances[instances.scores > 0.9]
36 | """
37 |
38 | def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
39 | """
40 | Args:
41 | image_size (height, width): the spatial size of the image.
42 | kwargs: fields to add to this `Instances`.
43 | """
44 | self._image_size = image_size
45 | self._fields: Dict[str, Any] = {}
46 | for k, v in kwargs.items():
47 | self.set(k, v)
48 |
49 | @property
50 | def image_size(self) -> Tuple[int, int]:
51 | """
52 | Returns:
53 | tuple: height, width
54 | """
55 | return self._image_size
56 |
57 | def __setattr__(self, name: str, val: Any) -> None:
58 | if name.startswith("_"):
59 | super().__setattr__(name, val)
60 | else:
61 | self.set(name, val)
62 |
63 | def __getattr__(self, name: str) -> Any:
64 | if name == "_fields" or name not in self._fields:
65 | raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
66 | return self._fields[name]
67 |
68 | def set(self, name: str, value: Any) -> None:
69 | """
70 | Set the field named `name` to `value`.
71 | The length of `value` must be the number of instances,
72 | and must agree with other existing fields in this object.
73 | """
74 | data_len = len(value)
75 | if len(self._fields):
76 | assert (
77 | len(self) == data_len
78 | ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
79 | self._fields[name] = value
80 |
81 | def has(self, name: str) -> bool:
82 | """
83 | Returns:
84 | bool: whether the field called `name` exists.
85 | """
86 | return name in self._fields
87 |
88 | def remove(self, name: str) -> None:
89 | """
90 | Remove the field called `name`.
91 | """
92 | del self._fields[name]
93 |
94 | def get(self, name: str) -> Any:
95 | """
96 | Returns the field called `name`.
97 | """
98 | return self._fields[name]
99 |
100 | def get_fields(self) -> Dict[str, Any]:
101 | """
102 | Returns:
103 | dict: a dict which maps names (str) to data of the fields
104 |
105 | Modifying the returned dict will modify this instance.
106 | """
107 | return self._fields
108 |
109 | # Tensor-like methods
110 | def to(self, *args: Any, **kwargs: Any) -> "Instances":
111 | """
112 | Returns:
113 | Instances: all fields are called with a `to(device)`, if the field has this method.
114 | """
115 | ret = Instances(self._image_size)
116 | for k, v in self._fields.items():
117 | if hasattr(v, "to"):
118 | v = v.to(*args, **kwargs)
119 | ret.set(k, v)
120 | return ret
121 |
122 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
123 | """
124 | Args:
125 | item: an index-like object and will be used to index all the fields.
126 |
127 | Returns:
128 | If `item` is a string, return the data in the corresponding field.
129 | Otherwise, returns an `Instances` where all fields are indexed by `item`.
130 | """
131 | if type(item) == int:
132 | if item >= len(self) or item < -len(self):
133 | raise IndexError("Instances index out of range!")
134 | else:
135 | item = slice(item, None, len(self))
136 |
137 | ret = Instances(self._image_size)
138 | for k, v in self._fields.items():
139 | ret.set(k, v[item])
140 | return ret
141 |
142 | def __len__(self) -> int:
143 | for v in self._fields.values():
144 | # use __len__ because len() has to be int and is not friendly to tracing
145 | return v.__len__()
146 | raise NotImplementedError("Empty Instances does not support __len__!")
147 |
148 | def __iter__(self):
149 | raise NotImplementedError("`Instances` object is not iterable!")
150 |
151 | @staticmethod
152 | def cat(instance_lists: List["Instances"]) -> "Instances":
153 | """
154 | Args:
155 | instance_lists (list[Instances])
156 |
157 | Returns:
158 | Instances
159 | """
160 | assert all(isinstance(i, Instances) for i in instance_lists)
161 | assert len(instance_lists) > 0
162 | if len(instance_lists) == 1:
163 | return instance_lists[0]
164 |
165 | image_size = instance_lists[0].image_size
166 | for i in instance_lists[1:]:
167 | assert i.image_size == image_size
168 | ret = Instances(image_size)
169 | for k in instance_lists[0]._fields.keys():
170 | values = [i.get(k) for i in instance_lists]
171 | v0 = values[0]
172 | if isinstance(v0, torch.Tensor):
173 | values = torch.cat(values, dim=0)
174 | elif isinstance(v0, list):
175 | values = list(itertools.chain(*values))
176 | elif hasattr(type(v0), "cat"):
177 | values = type(v0).cat(values)
178 | else:
179 | raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
180 | ret.set(k, values)
181 | return ret
182 |
183 | def __str__(self) -> str:
184 | s = self.__class__.__name__ + "("
185 | s += "num_instances={}, ".format(len(self))
186 | s += "image_height={}, ".format(self._image_size[0])
187 | s += "image_width={}, ".format(self._image_size[1])
188 | s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
189 | return s
190 |
191 | __repr__ = __str__
192 |
--------------------------------------------------------------------------------
/detectron2/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 |
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 |
--------------------------------------------------------------------------------
/detectron2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/detectron2/utils/analysis.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | import typing
6 | import torch
7 | from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
8 | from torch import nn
9 |
10 | from detectron2.structures import BitMasks, Boxes, ImageList, Instances
11 |
12 | from .logger import log_first_n
13 |
14 | __all__ = [
15 | "activation_count_operators",
16 | "flop_count_operators",
17 | "parameter_count_table",
18 | "parameter_count",
19 | ]
20 |
21 | FLOPS_MODE = "flops"
22 | ACTIVATIONS_MODE = "activations"
23 |
24 |
25 | # some extra ops to ignore from counting.
26 | _IGNORED_OPS = {
27 | "aten::add",
28 | "aten::add_",
29 | "aten::batch_norm",
30 | "aten::constant_pad_nd",
31 | "aten::div",
32 | "aten::div_",
33 | "aten::exp",
34 | "aten::log2",
35 | "aten::max_pool2d",
36 | "aten::meshgrid",
37 | "aten::mul",
38 | "aten::mul_",
39 | "aten::nonzero_numpy",
40 | "aten::rsub",
41 | "aten::sigmoid",
42 | "aten::sigmoid_",
43 | "aten::softmax",
44 | "aten::sort",
45 | "aten::sqrt",
46 | "aten::sub",
47 | "aten::upsample_nearest2d",
48 | "prim::PythonOp",
49 | "torchvision::nms", # TODO estimate flop for nms
50 | }
51 |
52 |
53 | def flop_count_operators(
54 | model: nn.Module, inputs: list, **kwargs
55 | ) -> typing.DefaultDict[str, float]:
56 | """
57 | Implement operator-level flops counting using jit.
58 | This is a wrapper of fvcore.nn.flop_count, that supports standard detection models
59 | in detectron2.
60 |
61 | Note:
62 | The function runs the input through the model to compute flops.
63 | The flops of a detection model is often input-dependent, for example,
64 | the flops of box & mask head depends on the number of proposals &
65 | the number of detected objects.
66 | Therefore, the flops counting using a single input may not accurately
67 | reflect the computation cost of a model.
68 |
69 | Args:
70 | model: a detectron2 model that takes `list[dict]` as input.
71 | inputs (list[dict]): inputs to model, in detectron2's standard format.
72 | """
73 | return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs)
74 |
75 |
76 | def activation_count_operators(
77 | model: nn.Module, inputs: list, **kwargs
78 | ) -> typing.DefaultDict[str, float]:
79 | """
80 | Implement operator-level activations counting using jit.
81 | This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
82 | in detectron2.
83 |
84 | Note:
85 | The function runs the input through the model to compute activations.
86 | The activations of a detection model is often input-dependent, for example,
87 | the activations of box & mask head depends on the number of proposals &
88 | the number of detected objects.
89 |
90 | Args:
91 | model: a detectron2 model that takes `list[dict]` as input.
92 | inputs (list[dict]): inputs to model, in detectron2's standard format.
93 | """
94 | return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
95 |
96 |
97 | def _flatten_to_tuple(outputs):
98 | result = []
99 | if isinstance(outputs, torch.Tensor):
100 | result.append(outputs)
101 | elif isinstance(outputs, (list, tuple)):
102 | for v in outputs:
103 | result.extend(_flatten_to_tuple(v))
104 | elif isinstance(outputs, dict):
105 | for _, v in outputs.items():
106 | result.extend(_flatten_to_tuple(v))
107 | elif isinstance(outputs, Instances):
108 | result.extend(_flatten_to_tuple(outputs.get_fields()))
109 | elif isinstance(outputs, (Boxes, BitMasks, ImageList)):
110 | result.append(outputs.tensor)
111 | else:
112 | log_first_n(
113 | logging.WARN,
114 | f"Output of type {type(outputs)} not included in flops/activations count.",
115 | n=10,
116 | )
117 | return tuple(result)
118 |
119 |
120 | def _wrapper_count_operators(
121 | model: nn.Module, inputs: list, mode: str, **kwargs
122 | ) -> typing.DefaultDict[str, float]:
123 |
124 | # ignore some ops
125 | supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
126 | supported_ops.update(kwargs.pop("supported_ops", {}))
127 | kwargs["supported_ops"] = supported_ops
128 |
129 | assert len(inputs) == 1, "Please use batch size=1"
130 | tensor_input = inputs[0]["image"]
131 |
132 | class WrapModel(nn.Module):
133 | def __init__(self, model):
134 | super().__init__()
135 | if isinstance(
136 | model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)
137 | ):
138 | self.model = model.module
139 | else:
140 | self.model = model
141 |
142 | def forward(self, image):
143 | # jit requires the input/output to be Tensors
144 | inputs = [{"image": image}]
145 | outputs = self.model.forward(inputs)
146 | # Only the subgraph that computes the returned tuple of tensor will be
147 | # counted. So we flatten everything we found to tuple of tensors.
148 | return _flatten_to_tuple(outputs)
149 |
150 | old_train = model.training
151 | with torch.no_grad():
152 | if mode == FLOPS_MODE:
153 | ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
154 | elif mode == ACTIVATIONS_MODE:
155 | ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
156 | else:
157 | raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
158 | # compatible with change in fvcore
159 | if isinstance(ret, tuple):
160 | ret = ret[0]
161 | model.train(old_train)
162 | return ret
163 |
--------------------------------------------------------------------------------
/detectron2/utils/colormap.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | """
4 | An awesome colormap for really neat visualizations.
5 | Copied from Detectron, and removed gray colors.
6 | """
7 |
8 | import numpy as np
9 |
10 | __all__ = ["colormap", "random_color"]
11 |
12 | # fmt: off
13 | # RGB:
14 | _COLORS = np.array(
15 | [
16 | 0.000, 0.447, 0.741,
17 | 0.850, 0.325, 0.098,
18 | 0.929, 0.694, 0.125,
19 | 0.494, 0.184, 0.556,
20 | 0.466, 0.674, 0.188,
21 | 0.301, 0.745, 0.933,
22 | 0.635, 0.078, 0.184,
23 | 0.300, 0.300, 0.300,
24 | 0.600, 0.600, 0.600,
25 | 1.000, 0.000, 0.000,
26 | 1.000, 0.500, 0.000,
27 | 0.749, 0.749, 0.000,
28 | 0.000, 1.000, 0.000,
29 | 0.000, 0.000, 1.000,
30 | 0.667, 0.000, 1.000,
31 | 0.333, 0.333, 0.000,
32 | 0.333, 0.667, 0.000,
33 | 0.333, 1.000, 0.000,
34 | 0.667, 0.333, 0.000,
35 | 0.667, 0.667, 0.000,
36 | 0.667, 1.000, 0.000,
37 | 1.000, 0.333, 0.000,
38 | 1.000, 0.667, 0.000,
39 | 1.000, 1.000, 0.000,
40 | 0.000, 0.333, 0.500,
41 | 0.000, 0.667, 0.500,
42 | 0.000, 1.000, 0.500,
43 | 0.333, 0.000, 0.500,
44 | 0.333, 0.333, 0.500,
45 | 0.333, 0.667, 0.500,
46 | 0.333, 1.000, 0.500,
47 | 0.667, 0.000, 0.500,
48 | 0.667, 0.333, 0.500,
49 | 0.667, 0.667, 0.500,
50 | 0.667, 1.000, 0.500,
51 | 1.000, 0.000, 0.500,
52 | 1.000, 0.333, 0.500,
53 | 1.000, 0.667, 0.500,
54 | 1.000, 1.000, 0.500,
55 | 0.000, 0.333, 1.000,
56 | 0.000, 0.667, 1.000,
57 | 0.000, 1.000, 1.000,
58 | 0.333, 0.000, 1.000,
59 | 0.333, 0.333, 1.000,
60 | 0.333, 0.667, 1.000,
61 | 0.333, 1.000, 1.000,
62 | 0.667, 0.000, 1.000,
63 | 0.667, 0.333, 1.000,
64 | 0.667, 0.667, 1.000,
65 | 0.667, 1.000, 1.000,
66 | 1.000, 0.000, 1.000,
67 | 1.000, 0.333, 1.000,
68 | 1.000, 0.667, 1.000,
69 | 0.333, 0.000, 0.000,
70 | 0.500, 0.000, 0.000,
71 | 0.667, 0.000, 0.000,
72 | 0.833, 0.000, 0.000,
73 | 1.000, 0.000, 0.000,
74 | 0.000, 0.167, 0.000,
75 | 0.000, 0.333, 0.000,
76 | 0.000, 0.500, 0.000,
77 | 0.000, 0.667, 0.000,
78 | 0.000, 0.833, 0.000,
79 | 0.000, 1.000, 0.000,
80 | 0.000, 0.000, 0.167,
81 | 0.000, 0.000, 0.333,
82 | 0.000, 0.000, 0.500,
83 | 0.000, 0.000, 0.667,
84 | 0.000, 0.000, 0.833,
85 | 0.000, 0.000, 1.000,
86 | 0.000, 0.000, 0.000,
87 | 0.143, 0.143, 0.143,
88 | 0.857, 0.857, 0.857,
89 | 1.000, 1.000, 1.000
90 | ]
91 | ).astype(np.float32).reshape(-1, 3)
92 | # fmt: on
93 |
94 |
95 | def colormap(rgb=False, maximum=255):
96 | """
97 | Args:
98 | rgb (bool): whether to return RGB colors or BGR colors.
99 | maximum (int): either 255 or 1
100 |
101 | Returns:
102 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
103 | """
104 | assert maximum in [255, 1], maximum
105 | c = _COLORS * maximum
106 | if not rgb:
107 | c = c[:, ::-1]
108 | return c
109 |
110 |
111 | def random_color(rgb=False, maximum=255):
112 | """
113 | Args:
114 | rgb (bool): whether to return RGB colors or BGR colors.
115 | maximum (int): either 255 or 1
116 |
117 | Returns:
118 | ndarray: a vector of 3 numbers
119 | """
120 | idx = np.random.randint(0, len(_COLORS))
121 | ret = _COLORS[idx] * maximum
122 | if not rgb:
123 | ret = ret[::-1]
124 | return ret
125 |
126 |
127 | if __name__ == "__main__":
128 | import cv2
129 |
130 | size = 100
131 | H, W = 10, 10
132 | canvas = np.random.rand(H * size, W * size, 3).astype("float32")
133 | for h in range(H):
134 | for w in range(W):
135 | idx = h * W + w
136 | if idx >= len(_COLORS):
137 | break
138 | canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
139 | cv2.imshow("a", canvas)
140 | cv2.waitKey(0)
141 |
--------------------------------------------------------------------------------
/detectron2/utils/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import importlib
3 | import importlib.util
4 | import logging
5 | import numpy as np
6 | import os
7 | import random
8 | import sys
9 | from datetime import datetime
10 | import torch
11 |
12 | __all__ = ["seed_all_rng"]
13 |
14 |
15 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
16 | """
17 | PyTorch version as a tuple of 2 ints. Useful for comparison.
18 | """
19 |
20 |
21 | def seed_all_rng(seed=None):
22 | """
23 | Set the random seed for the RNG in torch, numpy and python.
24 |
25 | Args:
26 | seed (int): if None, will use a strong random seed.
27 | """
28 | if seed is None:
29 | seed = (
30 | os.getpid()
31 | + int(datetime.now().strftime("%S%f"))
32 | + int.from_bytes(os.urandom(2), "big")
33 | )
34 | logger = logging.getLogger(__name__)
35 | logger.info("Using a generated random seed {}".format(seed))
36 | np.random.seed(seed)
37 | torch.set_rng_state(torch.manual_seed(seed).get_state())
38 | random.seed(seed)
39 |
40 |
41 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
42 | def _import_file(module_name, file_path, make_importable=False):
43 | spec = importlib.util.spec_from_file_location(module_name, file_path)
44 | module = importlib.util.module_from_spec(spec)
45 | spec.loader.exec_module(module)
46 | if make_importable:
47 | sys.modules[module_name] = module
48 | return module
49 |
50 |
51 | def _configure_libraries():
52 | """
53 | Configurations for some libraries.
54 | """
55 | # An environment option to disable `import cv2` globally,
56 | # in case it leads to negative performance impact
57 | disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
58 | if disable_cv2:
59 | sys.modules["cv2"] = None
60 | else:
61 | # Disable opencl in opencv since its interaction with cuda often has negative effects
62 | # This envvar is supported after OpenCV 3.4.0
63 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
64 | try:
65 | import cv2
66 |
67 | if int(cv2.__version__.split(".")[0]) >= 3:
68 | cv2.ocl.setUseOpenCL(False)
69 | except ModuleNotFoundError:
70 | # Other types of ImportError, if happened, should not be ignored.
71 | # Because a failed opencv import could mess up address space
72 | # https://github.com/skvark/opencv-python/issues/381
73 | pass
74 |
75 | def get_version(module, digit=2):
76 | return tuple(map(int, module.__version__.split(".")[:digit]))
77 |
78 | # fmt: off
79 | assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
80 | import fvcore
81 | assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2"
82 | import yaml
83 | assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
84 | # fmt: on
85 |
86 |
87 | _ENV_SETUP_DONE = False
88 |
89 |
90 | def setup_environment():
91 | """Perform environment setup work. The default setup is a no-op, but this
92 | function allows the user to specify a Python source file or a module in
93 | the $DETECTRON2_ENV_MODULE environment variable, that performs
94 | custom setup work that may be necessary to their computing environment.
95 | """
96 | global _ENV_SETUP_DONE
97 | if _ENV_SETUP_DONE:
98 | return
99 | _ENV_SETUP_DONE = True
100 |
101 | _configure_libraries()
102 |
103 | custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
104 |
105 | if custom_module_path:
106 | setup_custom_environment(custom_module_path)
107 | else:
108 | # The default setup is a no-op
109 | pass
110 |
111 |
112 | def setup_custom_environment(custom_module):
113 | """
114 | Load custom environment setup by importing a Python source file or a
115 | module, and run the setup function.
116 | """
117 | if custom_module.endswith(".py"):
118 | module = _import_file("detectron2.utils.env.custom_module", custom_module)
119 | else:
120 | module = importlib.import_module(custom_module)
121 | assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
122 | "Custom environment module defined in {} does not have the "
123 | "required callable attribute 'setup_environment'."
124 | ).format(custom_module)
125 | module.setup_environment()
126 |
--------------------------------------------------------------------------------
/detectron2/utils/file_io.py:
--------------------------------------------------------------------------------
1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase
2 |
3 | __all__ = ["PathManager", "PathHandler"]
4 |
5 |
6 | PathManager = PathManagerBase()
7 | """
8 | This is a detectron2 project-specific PathManager.
9 | We try to stay away from global PathManager in fvcore as it
10 | introduces potential conflicts among other libraries.
11 | """
12 |
13 |
14 | class Detectron2Handler(PathHandler):
15 | """
16 | Resolve anything that's hosted under detectron2's namespace.
17 | """
18 |
19 | PREFIX = "detectron2://"
20 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
21 |
22 | def _get_supported_prefixes(self):
23 | return [self.PREFIX]
24 |
25 | def _get_local_path(self, path):
26 | name = path[len(self.PREFIX) :]
27 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
28 |
29 | def _open(self, path, mode="r", **kwargs):
30 | return PathManager.open(self._get_local_path(path), mode, **kwargs)
31 |
32 |
33 | PathManager.register_handler(HTTPURLHandler())
34 | PathManager.register_handler(OneDrivePathHandler())
35 | PathManager.register_handler(Detectron2Handler())
36 |
--------------------------------------------------------------------------------
/detectron2/utils/memory.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import logging
4 | from contextlib import contextmanager
5 | from functools import wraps
6 | import torch
7 |
8 | __all__ = ["retry_if_cuda_oom"]
9 |
10 |
11 | @contextmanager
12 | def _ignore_torch_cuda_oom():
13 | """
14 | A context which ignores CUDA OOM exception from pytorch.
15 | """
16 | try:
17 | yield
18 | except RuntimeError as e:
19 | # NOTE: the string may change?
20 | if "CUDA out of memory. " in str(e):
21 | pass
22 | else:
23 | raise
24 |
25 |
26 | def retry_if_cuda_oom(func):
27 | """
28 | Makes a function retry itself after encountering
29 | pytorch's CUDA OOM error.
30 | It will first retry after calling `torch.cuda.empty_cache()`.
31 |
32 | If that still fails, it will then retry by trying to convert inputs to CPUs.
33 | In this case, it expects the function to dispatch to CPU implementation.
34 | The return values may become CPU tensors as well and it's user's
35 | responsibility to convert it back to CUDA tensor if needed.
36 |
37 | Args:
38 | func: a stateless callable that takes tensor-like objects as arguments
39 |
40 | Returns:
41 | a callable which retries `func` if OOM is encountered.
42 |
43 | Examples:
44 | ::
45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2)
46 | # output may be on CPU even if inputs are on GPU
47 |
48 | Note:
49 | 1. When converting inputs to CPU, it will only look at each argument and check
50 | if it has `.device` and `.to` for conversion. Nested structures of tensors
51 | are not supported.
52 |
53 | 2. Since the function might be called more than once, it has to be
54 | stateless.
55 | """
56 |
57 | def maybe_to_cpu(x):
58 | try:
59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
60 | except AttributeError:
61 | like_gpu_tensor = False
62 | if like_gpu_tensor:
63 | return x.to(device="cpu")
64 | else:
65 | return x
66 |
67 | @wraps(func)
68 | def wrapped(*args, **kwargs):
69 | with _ignore_torch_cuda_oom():
70 | return func(*args, **kwargs)
71 |
72 | # Clear cache and retry
73 | torch.cuda.empty_cache()
74 | with _ignore_torch_cuda_oom():
75 | return func(*args, **kwargs)
76 |
77 | # Try on CPU. This slows down the code significantly, therefore print a notice.
78 | logger = logging.getLogger(__name__)
79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
80 | new_args = (maybe_to_cpu(x) for x in args)
81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
82 | return func(*new_args, **new_kwargs)
83 |
84 | return wrapped
85 |
--------------------------------------------------------------------------------
/detectron2/utils/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | # Keep this module for backward compatibility.
4 | from fvcore.common.registry import Registry # noqa
5 |
6 | __all__ = ["Registry"]
7 |
--------------------------------------------------------------------------------
/detectron2/utils/serialize.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import cloudpickle
3 |
4 |
5 | class PicklableWrapper(object):
6 | """
7 | Wrap an object to make it more picklable, note that it uses
8 | heavy weight serialization libraries that are slower than pickle.
9 | It's best to use it only on closures (which are usually not picklable).
10 |
11 | This is a simplified version of
12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 | """
14 |
15 | def __init__(self, obj):
16 | self._obj = obj
17 |
18 | def __reduce__(self):
19 | s = cloudpickle.dumps(self._obj)
20 | return cloudpickle.loads, (s,)
21 |
22 | def __call__(self, *args, **kwargs):
23 | return self._obj(*args, **kwargs)
24 |
25 | def __getattr__(self, attr):
26 | # Ensure that the wrapped object can be used seamlessly as the previous object.
27 | if attr not in ["_obj"]:
28 | return getattr(self._obj, attr)
29 | return getattr(self, attr)
30 |
--------------------------------------------------------------------------------
/detectron2/utils/testing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from detectron2 import model_zoo
5 | from detectron2.data import DatasetCatalog
6 | from detectron2.data.detection_utils import read_image
7 | from detectron2.modeling import build_model
8 | from detectron2.structures import Boxes
9 | from detectron2.utils.file_io import PathManager
10 |
11 |
12 | """
13 | Internal utilities for tests. Don't use except for writing tests.
14 | """
15 |
16 |
17 | def get_model_no_weights(config_path):
18 | """
19 | Like model_zoo.get, but do not load any weights (even pretrained)
20 | """
21 | cfg = model_zoo.get_config(config_path)
22 | if not torch.cuda.is_available():
23 | cfg.MODEL.DEVICE = "cpu"
24 | return build_model(cfg)
25 |
26 |
27 | def random_boxes(num_boxes, max_coord=100, device="cpu"):
28 | """
29 | Create a random Nx4 boxes tensor, with coordinates < max_coord.
30 | """
31 | boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5)
32 | boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression
33 | # Note: the implementation of this function in torchvision is:
34 | # boxes[:, 2:] += torch.rand(N, 2) * 100
35 | # but it does not guarantee non-negative widths/heights constraints:
36 | # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
37 | boxes[:, 2:] += boxes[:, :2]
38 | return boxes
39 |
40 |
41 | def get_sample_coco_image(tensor=True):
42 | """
43 | Args:
44 | tensor (bool): if True, returns 3xHxW tensor.
45 | else, returns a HxWx3 numpy array.
46 |
47 | Returns:
48 | an image, in BGR color.
49 | """
50 | try:
51 | file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"]
52 | if not PathManager.exists(file_name):
53 | raise FileNotFoundError()
54 | except IOError:
55 | # for public CI to run
56 | file_name = "http://images.cocodataset.org/train2017/000000000009.jpg"
57 | ret = read_image(file_name, format="BGR")
58 | if tensor:
59 | ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1)))
60 | return ret
61 |
62 |
63 | def assert_instances_allclose(input, other, rtol=1e-5, msg=""):
64 | """
65 | Args:
66 | input, other (Instances):
67 | """
68 | if not msg:
69 | msg = "Two Instances are different! "
70 | else:
71 | msg = msg.rstrip() + " "
72 | assert input.image_size == other.image_size, (
73 | msg + f"image_size is {input.image_size} vs. {other.image_size}!"
74 | )
75 | fields = sorted(input.get_fields().keys())
76 | fields_other = sorted(other.get_fields().keys())
77 | assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!"
78 |
79 | for f in fields:
80 | val1, val2 = input.get(f), other.get(f)
81 | if isinstance(val1, Boxes):
82 | # boxes in the range of O(100) and can have a larger tolerance
83 | assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), (
84 | msg + f"Field {f} differs too much!"
85 | )
86 | elif isinstance(val1, torch.Tensor):
87 | if val1.dtype.is_floating_point:
88 | mag = torch.abs(val1).max().cpu().item()
89 | assert torch.allclose(val1, val2, atol=mag * rtol), (
90 | msg + f"Field {f} differs too much!"
91 | )
92 | else:
93 | assert torch.equal(val1, val2), msg + f"Field {f} is different!"
94 | else:
95 | raise ValueError(f"Don't know how to compare type {type(val1)}")
96 |
--------------------------------------------------------------------------------
/projects/YOSO/configs/ade20k/panoptic-segmentation/Base-ADE20K-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | PIXEL_MEAN: [123.675, 116.280, 103.530]
3 | PIXEL_STD: [58.395, 57.120, 57.375]
4 | META_ARCHITECTURE: "YOSO"
5 | DATASETS:
6 | TRAIN: ("ade20k_panoptic_train",)
7 | TEST: ("ade20k_panoptic_val",)
8 | SOLVER:
9 | IMS_PER_BATCH: 16
10 | BASE_LR: 0.0001
11 | MAX_ITER: 160000
12 | WARMUP_FACTOR: 1.0
13 | WARMUP_ITERS: 0
14 | WEIGHT_DECAY: 0.05
15 | OPTIMIZER: "ADAMW"
16 | LR_SCHEDULER_NAME: "WarmupPolyLR"
17 | BACKBONE_MULTIPLIER: 0.1
18 | CLIP_GRADIENTS:
19 | ENABLED: True
20 | CLIP_TYPE: "full_model"
21 | CLIP_VALUE: 0.01
22 | NORM_TYPE: 2.0
23 | AMP:
24 | ENABLED: False
25 | INPUT:
26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"]
27 | MIN_SIZE_TRAIN_SAMPLING: "choice"
28 | MIN_SIZE_TEST: 640
29 | MAX_SIZE_TRAIN: 2560
30 | MAX_SIZE_TEST: 2560
31 | CROP:
32 | ENABLED: True
33 | TYPE: "absolute"
34 | SIZE: (640, 640)
35 | SINGLE_CATEGORY_MAX_AREA: 1.0
36 | COLOR_AUG_SSD: True
37 | SIZE_DIVISIBILITY: 640 # used in dataset mapper
38 | FORMAT: "RGB"
39 | DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 | EVAL_PERIOD: 5000
42 | AUG:
43 | ENABLED: False
44 | MIN_SIZES: [320, 480, 640, 800, 960, 1120]
45 | MAX_SIZE: 4480
46 | FLIP: True
47 | DATALOADER:
48 | FILTER_EMPTY_ANNOTATIONS: True
49 | NUM_WORKERS: 4
50 | VERSION: 2
51 | # CUDNN_BENCHMARK: True
52 |
--------------------------------------------------------------------------------
/projects/YOSO/configs/ade20k/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: Base-ADE20K-PanopticSegmentation.yaml
2 | MODEL:
3 | BACKBONE:
4 | FREEZE_AT: 0
5 | NAME: "build_resnet_backbone"
6 | RESNETS:
7 | DEPTH: 50
8 | STRIDE_IN_1X1: False
9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | YOSO:
12 | SIZE_DIVISIBILITY: 32
13 | # Structure
14 | NUM_CLASSES: 150
15 | NUM_STAGES: 2
16 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 | HIDDEN_DIM: 256
18 | NUM_PROPOSALS: 100
19 | CONV_KERNEL_SIZE_2D: 1
20 | CONV_KERNEL_SIZE_1D: 3
21 | NUM_CLS_FCS: 1
22 | NUM_MASK_FCS: 1
23 | # Loss
24 | NO_OBJECT_WEIGHT: 0.1
25 | CLASS_WEIGHT: 2.0
26 | MASK_WEIGHT: 5.0
27 | DICE_WEIGHT: 5.0
28 | TRAIN_NUM_POINTS: 12544
29 | OVERSAMPLE_RATIO: 3.0
30 | IMPORTANCE_SAMPLE_RATIO: 0.75
31 | TEMPERATIRE: 0.5 #1.0
32 | TEST:
33 | SEMANTIC_ON: False #True
34 | INSTANCE_ON: False #True
35 | PANOPTIC_ON: True
36 | OVERLAP_THRESHOLD: 0.8
37 | OBJECT_MASK_THRESHOLD: 0.2
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_ade20k"
39 |
--------------------------------------------------------------------------------
/projects/YOSO/configs/cityscapes/panoptic-segmentation/Base-Cityscapes-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | PIXEL_MEAN: [123.675, 116.280, 103.530]
3 | PIXEL_STD: [58.395, 57.120, 57.375]
4 | META_ARCHITECTURE: "YOSO"
5 | DATASETS:
6 | TRAIN: ("cityscapes_fine_panoptic_train",)
7 | TEST: ("cityscapes_fine_panoptic_val",)
8 | SOLVER:
9 | IMS_PER_BATCH: 16
10 | BASE_LR: 0.0001
11 | MAX_ITER: 180000 #90000 #
12 | WARMUP_FACTOR: 1.0
13 | WARMUP_ITERS: 0
14 | WEIGHT_DECAY: 0.05
15 | OPTIMIZER: "ADAMW"
16 | LR_SCHEDULER_NAME: "WarmupPolyLR"
17 | BACKBONE_MULTIPLIER: 0.1
18 | CLIP_GRADIENTS:
19 | ENABLED: True
20 | CLIP_TYPE: "full_model"
21 | CLIP_VALUE: 0.01
22 | NORM_TYPE: 2.0
23 | AMP:
24 | ENABLED: False
25 | INPUT:
26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"]
27 | MIN_SIZE_TRAIN_SAMPLING: "choice"
28 | MAX_SIZE_TRAIN: 4096
29 | MIN_SIZE_TEST: 512 #1024
30 | MAX_SIZE_TEST: 1024 #2048
31 | CROP:
32 | ENABLED: True
33 | TYPE: "absolute"
34 | SIZE: (512, 1024)
35 | SINGLE_CATEGORY_MAX_AREA: 1.0
36 | COLOR_AUG_SSD: True
37 | SIZE_DIVISIBILITY: -1
38 | FORMAT: "RGB"
39 | DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 | EVAL_PERIOD: 5000
42 | AUG:
43 | ENABLED: False
44 | MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792]
45 | MAX_SIZE: 4096
46 | FLIP: True
47 | DATALOADER:
48 | FILTER_EMPTY_ANNOTATIONS: True
49 | NUM_WORKERS: 4
50 | VERSION: 2
51 | CUDNN_BENCHMARK: True
--------------------------------------------------------------------------------
/projects/YOSO/configs/cityscapes/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: Base-Cityscapes-PanopticSegmentation.yaml
2 | MODEL:
3 | BACKBONE:
4 | FREEZE_AT: 0
5 | NAME: "build_resnet_backbone"
6 | RESNETS:
7 | DEPTH: 50
8 | STRIDE_IN_1X1: False
9 | NORM: "SyncBN" # use syncbn for cityscapes dataset
10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
11 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
12 | YOSO:
13 | SIZE_DIVISIBILITY: 32
14 | # Structure
15 | NUM_CLASSES: 19
16 | NUM_STAGES: 2
17 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
18 | HIDDEN_DIM: 256
19 | NUM_PROPOSALS: 100
20 | CONV_KERNEL_SIZE_2D: 1
21 | CONV_KERNEL_SIZE_1D: 3
22 | NUM_CLS_FCS: 3 #1
23 | NUM_MASK_FCS: 3 #1
24 | # Loss
25 | NO_OBJECT_WEIGHT: 0.1
26 | CLASS_WEIGHT: 2.0
27 | MASK_WEIGHT: 5.0
28 | DICE_WEIGHT: 5.0
29 | TRAIN_NUM_POINTS: 12544
30 | OVERSAMPLE_RATIO: 3.0
31 | IMPORTANCE_SAMPLE_RATIO: 0.75
32 | TEMPERATIRE: 0.05
33 | TEST:
34 | SEMANTIC_ON: False #True
35 | INSTANCE_ON: False #True
36 | PANOPTIC_ON: True
37 | OVERLAP_THRESHOLD: 0.8
38 | OBJECT_MASK_THRESHOLD: 0.8 #0.5
39 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_cityscapes"
--------------------------------------------------------------------------------
/projects/YOSO/configs/coco/panoptic-segmentation/Base-COCO-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | PIXEL_MEAN: [123.675, 116.280, 103.530]
3 | PIXEL_STD: [58.395, 57.120, 57.375]
4 | META_ARCHITECTURE: "YOSO"
5 | DATASETS:
6 | TRAIN: ("coco_2017_train_panoptic",)
7 | TEST: ("coco_2017_val_panoptic_with_sem_seg",) # to evaluate instance and semantic performance as well
8 | SOLVER:
9 | IMS_PER_BATCH: 16
10 | BASE_LR: 0.0001
11 | STEPS: (327778, 355092)
12 | MAX_ITER: 368750
13 | WARMUP_FACTOR: 1.0
14 | WARMUP_ITERS: 10
15 | WEIGHT_DECAY: 0.05
16 | OPTIMIZER: "ADAMW"
17 | BACKBONE_MULTIPLIER: 0.1
18 | CLIP_GRADIENTS:
19 | ENABLED: True
20 | CLIP_TYPE: "full_model"
21 | CLIP_VALUE: 0.01
22 | NORM_TYPE: 2.0
23 | AMP:
24 | ENABLED: False
25 | INPUT:
26 | IMAGE_SIZE: 1024
27 | MIN_SCALE: 0.1
28 | MAX_SCALE: 2.0
29 | FORMAT: "RGB"
30 | DATASET_MAPPER_NAME: "yoso_panoptic_lsj"
31 | MIN_SIZE_TEST: 800 # 550 #512
32 | MAX_SIZE_TEST: 1333 # 800 #800
33 | TEST:
34 | EVAL_PERIOD: 5000
35 | DATALOADER:
36 | FILTER_EMPTY_ANNOTATIONS: True
37 | NUM_WORKERS: 4
38 | VERSION: 2
39 | # CUDNN_BENCHMARK: True
--------------------------------------------------------------------------------
/projects/YOSO/configs/coco/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: Base-COCO-PanopticSegmentation.yaml
2 | MODEL:
3 | BACKBONE:
4 | FREEZE_AT: 0
5 | NAME: "build_resnet_backbone"
6 | RESNETS:
7 | DEPTH: 50
8 | STRIDE_IN_1X1: False
9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | YOSO:
12 | SIZE_DIVISIBILITY: 32
13 | # Structure
14 | NUM_CLASSES: 133
15 | NUM_STAGES: 2
16 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 | HIDDEN_DIM: 256
18 | NUM_PROPOSALS: 100
19 | CONV_KERNEL_SIZE_2D: 1
20 | CONV_KERNEL_SIZE_1D: 3
21 | NUM_CLS_FCS: 1
22 | NUM_MASK_FCS: 1
23 | # Loss
24 | NO_OBJECT_WEIGHT: 0.1
25 | CLASS_WEIGHT: 2.0
26 | MASK_WEIGHT: 5.0
27 | DICE_WEIGHT: 5.0
28 | TRAIN_NUM_POINTS: 12544
29 | OVERSAMPLE_RATIO: 3.0
30 | IMPORTANCE_SAMPLE_RATIO: 0.75
31 | TEMPERATIRE: 0.05
32 | TEST:
33 | SEMANTIC_ON: False
34 | INSTANCE_ON: False
35 | PANOPTIC_ON: True
36 | OVERLAP_THRESHOLD: 0.8
37 | OBJECT_MASK_THRESHOLD: 0.7
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_coco"
--------------------------------------------------------------------------------
/projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/Base-MapillaryVistas-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | PIXEL_MEAN: [123.675, 116.280, 103.530]
3 | PIXEL_STD: [58.395, 57.120, 57.375]
4 | META_ARCHITECTURE: "YOSO"
5 | DATASETS:
6 | TRAIN: ("mapillary_vistas_panoptic_train",)
7 | TEST: ("mapillary_vistas_panoptic_val",)
8 | SOLVER:
9 | IMS_PER_BATCH: 16
10 | BASE_LR: 0.0001
11 | MAX_ITER: 300000
12 | WARMUP_FACTOR: 1.0
13 | WARMUP_ITERS: 0
14 | WEIGHT_DECAY: 0.05
15 | OPTIMIZER: "ADAMW"
16 | LR_SCHEDULER_NAME: "WarmupPolyLR"
17 | BACKBONE_MULTIPLIER: 0.1
18 | CLIP_GRADIENTS:
19 | ENABLED: True
20 | CLIP_TYPE: "full_model"
21 | CLIP_VALUE: 0.01
22 | NORM_TYPE: 2.0
23 | AMP:
24 | ENABLED: False
25 | INPUT:
26 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 2048) for x in range(5, 21)]"]
27 | MIN_SIZE_TRAIN_SAMPLING: "choice"
28 | MIN_SIZE_TEST: 2048
29 | MAX_SIZE_TRAIN: 8192
30 | MAX_SIZE_TEST: 2048
31 | CROP:
32 | ENABLED: True
33 | TYPE: "absolute"
34 | SIZE: (1024, 1024)
35 | SINGLE_CATEGORY_MAX_AREA: 1.0
36 | COLOR_AUG_SSD: True
37 | SIZE_DIVISIBILITY: -1 #1024 # used in dataset mapper
38 | FORMAT: "RGB"
39 | DATASET_MAPPER_NAME: "yoso_panoptic"
40 | TEST:
41 | EVAL_PERIOD: 20000 #5000
42 | DATALOADER:
43 | FILTER_EMPTY_ANNOTATIONS: True
44 | NUM_WORKERS: 4
45 | VERSION: 2
--------------------------------------------------------------------------------
/projects/YOSO/configs/mapillary-vistas/panoptic-segmentation/YOSO-R50.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: Base-MapillaryVistas-PanopticSegmentation.yaml
2 | MODEL:
3 | BACKBONE:
4 | FREEZE_AT: 0
5 | NAME: "build_resnet_backbone"
6 | RESNETS:
7 | DEPTH: 50
8 | STRIDE_IN_1X1: False
9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
11 | YOSO:
12 | SIZE_DIVISIBILITY: 32
13 | # Structure
14 | NUM_CLASSES: 65
15 | NUM_STAGES: 2
16 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
17 | HIDDEN_DIM: 256
18 | NUM_PROPOSALS: 150 #100
19 | CONV_KERNEL_SIZE_2D: 1
20 | CONV_KERNEL_SIZE_1D: 3
21 | NUM_CLS_FCS: 3 #1
22 | NUM_MASK_FCS: 3 #1
23 | # Loss
24 | NO_OBJECT_WEIGHT: 0.1
25 | CLASS_WEIGHT: 2.0
26 | MASK_WEIGHT: 5.0
27 | DICE_WEIGHT: 5.0
28 | TRAIN_NUM_POINTS: 12544
29 | OVERSAMPLE_RATIO: 3.0
30 | IMPORTANCE_SAMPLE_RATIO: 0.75
31 | TEMPERATIRE: 0.1 #0.5
32 | TEST:
33 | SEMANTIC_ON: False
34 | INSTANCE_ON: False
35 | PANOPTIC_ON: True
36 | OVERLAP_THRESHOLD: 0.8
37 | OBJECT_MASK_THRESHOLD: 0.0
38 | OUTPUT_DIR: "output/yoso_resnet50_panoptic_seg_mapillary"
39 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import add_yoso_config
2 | from .segmentator import YOSO
3 | from . import data
4 | from .data.dataset_mappers.yoso_instance_lsj_dataset_mapper import YOSOInstanceLSJDatasetMapper
5 | from .data.dataset_mappers.yoso_panoptic_lsj_dataset_mapper import YOSOPanopticLSJDatasetMapper
6 | from .data.dataset_mappers.yoso_instance_dataset_mapper import YOSOInstanceDatasetMapper
7 | from .data.dataset_mappers.yoso_panoptic_dataset_mapper import YOSOPanopticDatasetMapper
8 | from .data.dataset_mappers.yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper
9 | from .utils import build_lr_scheduler, SemanticSegmentorWithTTA
--------------------------------------------------------------------------------
/projects/YOSO/yoso/config.py:
--------------------------------------------------------------------------------
1 | from detectron2.config import CfgNode as CN
2 |
3 | def add_yoso_config(cfg):
4 | cfg.MODEL.YOSO = CN()
5 | cfg.MODEL.YOSO.SIZE_DIVISIBILITY = 32
6 | cfg.MODEL.YOSO.NUM_CLASSES = 133
7 | cfg.MODEL.YOSO.NUM_STAGES = 2
8 |
9 | cfg.MODEL.YOSO.IN_FEATURES = ["res2", "res3", "res4", "res5"]
10 | cfg.MODEL.YOSO.HIDDEN_DIM = 256
11 | cfg.MODEL.YOSO.AGG_DIM = 128
12 | cfg.MODEL.YOSO.NUM_PROPOSALS = 100
13 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_2D = 1
14 | cfg.MODEL.YOSO.CONV_KERNEL_SIZE_1D = 3
15 | cfg.MODEL.YOSO.NUM_CLS_FCS = 1
16 | cfg.MODEL.YOSO.NUM_MASK_FCS = 1
17 |
18 | cfg.MODEL.YOSO.NO_OBJECT_WEIGHT = 0.1
19 | cfg.MODEL.YOSO.CLASS_WEIGHT = 2.0
20 | cfg.MODEL.YOSO.MASK_WEIGHT = 5.0
21 | cfg.MODEL.YOSO.DICE_WEIGHT = 5.0
22 | cfg.MODEL.YOSO.TRAIN_NUM_POINTS = 112 * 112
23 | cfg.MODEL.YOSO.OVERSAMPLE_RATIO = 3.0
24 | cfg.MODEL.YOSO.IMPORTANCE_SAMPLE_RATIO = 0.75
25 | cfg.MODEL.YOSO.TEMPERATIRE = 0.1
26 |
27 | cfg.MODEL.YOSO.TEST = CN()
28 | cfg.MODEL.YOSO.TEST.SEMANTIC_ON = False
29 | cfg.MODEL.YOSO.TEST.INSTANCE_ON = False
30 | cfg.MODEL.YOSO.TEST.PANOPTIC_ON = False
31 | cfg.MODEL.YOSO.TEST.OBJECT_MASK_THRESHOLD = 0.0
32 | cfg.MODEL.YOSO.TEST.OVERLAP_THRESHOLD = 0.0
33 | cfg.MODEL.YOSO.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
34 |
35 | cfg.SOLVER.OPTIMIZER = "ADAMW"
36 | cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
37 | cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
38 | cfg.SOLVER.WEIGHT_DECAY_BIAS = None
39 |
40 | cfg.SOLVER.POLY_LR_POWER = 0.9
41 | cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0
42 |
43 | cfg.INPUT.DATASET_MAPPER_NAME = "yoso_panoptic_lsj"
44 | cfg.INPUT.SIZE_DIVISIBILITY = -1
45 | cfg.INPUT.COLOR_AUG_SSD = False
46 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
47 |
48 | cfg.INPUT.IMAGE_SIZE = 1024
49 | cfg.INPUT.MIN_SCALE = 0.1
50 | cfg.INPUT.MAX_SCALE = 2.0
51 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_dataset_mapper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import copy
3 | import logging
4 |
5 | import numpy as np
6 | import torch
7 | from torch.nn import functional as F
8 |
9 | from detectron2.config import configurable
10 | from detectron2.data import detection_utils as utils
11 | from detectron2.data import transforms as T
12 | from detectron2.structures import BitMasks, Instances
13 |
14 | from .yoso_semantic_dataset_mapper import YOSOSemanticDatasetMapper
15 |
16 | __all__ = ["YOSOPanopticDatasetMapper"]
17 |
18 |
19 | class YOSOPanopticDatasetMapper(YOSOSemanticDatasetMapper):
20 | """
21 | A callable which takes a dataset dict in Detectron2 Dataset format,
22 | and map it into a format used by MaskFormer for panoptic segmentation.
23 |
24 | The callable currently does the following:
25 |
26 | 1. Read the image from "file_name"
27 | 2. Applies geometric transforms to the image and annotation
28 | 3. Find and applies suitable cropping to the image and annotation
29 | 4. Prepare image and annotation to Tensors
30 | """
31 |
32 | @configurable
33 | def __init__(
34 | self,
35 | is_train=True,
36 | *,
37 | augmentations,
38 | image_format,
39 | ignore_label,
40 | size_divisibility,
41 | ):
42 | """
43 | NOTE: this interface is experimental.
44 | Args:
45 | is_train: for training or inference
46 | augmentations: a list of augmentations or deterministic transforms to apply
47 | image_format: an image format supported by :func:`detection_utils.read_image`.
48 | ignore_label: the label that is ignored to evaluation
49 | size_divisibility: pad image size to be divisible by this value
50 | """
51 | super().__init__(
52 | is_train,
53 | augmentations=augmentations,
54 | image_format=image_format,
55 | ignore_label=ignore_label,
56 | size_divisibility=size_divisibility,
57 | )
58 |
59 | def __call__(self, dataset_dict):
60 | """
61 | Args:
62 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
63 |
64 | Returns:
65 | dict: a format that builtin models in detectron2 accept
66 | """
67 | assert self.is_train, "YOSOPanopticDatasetMapper should only be used for training!"
68 |
69 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
70 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
71 | utils.check_image_size(dataset_dict, image)
72 |
73 | # semantic segmentation
74 | if "sem_seg_file_name" in dataset_dict:
75 | # PyTorch transformation not implemented for uint16, so converting it to double first
76 | sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
77 | else:
78 | sem_seg_gt = None
79 |
80 | # panoptic segmentation
81 | if "pan_seg_file_name" in dataset_dict:
82 | pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
83 | segments_info = dataset_dict["segments_info"]
84 | else:
85 | pan_seg_gt = None
86 | segments_info = None
87 |
88 | if pan_seg_gt is None:
89 | raise ValueError(
90 | "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format(
91 | dataset_dict["file_name"]
92 | )
93 | )
94 |
95 | aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
96 | aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
97 | image = aug_input.image
98 | if sem_seg_gt is not None:
99 | sem_seg_gt = aug_input.sem_seg
100 |
101 | # apply the same transformation to panoptic segmentation
102 | pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
103 |
104 | from panopticapi.utils import rgb2id
105 |
106 | pan_seg_gt = rgb2id(pan_seg_gt)
107 |
108 | # Pad image and segmentation label here!
109 | image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
110 | if sem_seg_gt is not None:
111 | sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
112 | pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long"))
113 |
114 | if self.size_divisibility > 0:
115 | image_size = (image.shape[-2], image.shape[-1])
116 | padding_size = [
117 | 0,
118 | self.size_divisibility - image_size[1],
119 | 0,
120 | self.size_divisibility - image_size[0],
121 | ]
122 | image = F.pad(image, padding_size, value=128).contiguous()
123 | if sem_seg_gt is not None:
124 | sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
125 | pan_seg_gt = F.pad(
126 | pan_seg_gt, padding_size, value=0
127 | ).contiguous() # 0 is the VOID panoptic label
128 |
129 | image_shape = (image.shape[-2], image.shape[-1]) # h, w
130 |
131 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
132 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
133 | # Therefore it's important to use torch.Tensor.
134 | dataset_dict["image"] = image
135 | if sem_seg_gt is not None:
136 | dataset_dict["sem_seg"] = sem_seg_gt.long()
137 |
138 | if "annotations" in dataset_dict:
139 | raise ValueError("Pemantic segmentation dataset should not have 'annotations'.")
140 |
141 | # Prepare per-category binary masks
142 | pan_seg_gt = pan_seg_gt.numpy()
143 | instances = Instances(image_shape)
144 | classes = []
145 | masks = []
146 | for segment_info in segments_info:
147 | class_id = segment_info["category_id"]
148 | if not segment_info["iscrowd"]:
149 | classes.append(class_id)
150 | masks.append(pan_seg_gt == segment_info["id"])
151 |
152 | classes = np.array(classes)
153 | instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
154 | if len(masks) == 0:
155 | # Some image does not have annotation (all ignored)
156 | instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
157 | else:
158 | masks = BitMasks(
159 | torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
160 | )
161 | instances.gt_masks = masks.tensor
162 |
163 | dataset_dict["instances"] = instances
164 |
165 | return dataset_dict
166 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/dataset_mappers/yoso_panoptic_lsj_dataset_mapper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
3 | import copy
4 | import logging
5 |
6 | import numpy as np
7 | import torch
8 |
9 | from detectron2.config import configurable
10 | from detectron2.data import detection_utils as utils
11 | from detectron2.data import transforms as T
12 | from detectron2.data.transforms import TransformGen
13 | from detectron2.structures import BitMasks, Boxes, Instances
14 |
15 | __all__ = ["YOSOPanopticLSJDatasetMapper"]
16 |
17 |
18 | def build_transform_gen(cfg, is_train):
19 | """
20 | Create a list of default :class:`Augmentation` from config.
21 | Now it includes resizing and flipping.
22 | Returns:
23 | list[Augmentation]
24 | """
25 | assert is_train, "Only support training augmentation"
26 | image_size = cfg.INPUT.IMAGE_SIZE
27 | min_scale = cfg.INPUT.MIN_SCALE
28 | max_scale = cfg.INPUT.MAX_SCALE
29 |
30 | augmentation = []
31 |
32 | if cfg.INPUT.RANDOM_FLIP != "none":
33 | augmentation.append(
34 | T.RandomFlip(
35 | horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
36 | vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
37 | )
38 | )
39 |
40 | augmentation.extend([
41 | T.ResizeScale(
42 | min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
43 | ),
44 | T.FixedSizeCrop(crop_size=(image_size, image_size)),
45 | ])
46 |
47 | return augmentation
48 |
49 |
50 | # This is specifically designed for the COCO dataset.
51 | class YOSOPanopticLSJDatasetMapper:
52 | """
53 | A callable which takes a dataset dict in Detectron2 Dataset format,
54 | and map it into a format used by MaskFormer.
55 |
56 | This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
57 |
58 | The callable currently does the following:
59 |
60 | 1. Read the image from "file_name"
61 | 2. Applies geometric transforms to the image and annotation
62 | 3. Find and applies suitable cropping to the image and annotation
63 | 4. Prepare image and annotation to Tensors
64 | """
65 |
66 | @configurable
67 | def __init__(
68 | self,
69 | is_train=True,
70 | *,
71 | tfm_gens,
72 | image_format,
73 | ):
74 | """
75 | NOTE: this interface is experimental.
76 | Args:
77 | is_train: for training or inference
78 | augmentations: a list of augmentations or deterministic transforms to apply
79 | crop_gen: crop augmentation
80 | tfm_gens: data augmentation
81 | image_format: an image format supported by :func:`detection_utils.read_image`.
82 | """
83 | self.tfm_gens = tfm_gens
84 | logging.getLogger(__name__).info(
85 | "[YOSOPanopticLSJDatasetMapper] Full TransformGens used in training: {}".format(
86 | str(self.tfm_gens)
87 | )
88 | )
89 |
90 | self.img_format = image_format
91 | self.is_train = is_train
92 |
93 | @classmethod
94 | def from_config(cls, cfg, is_train=True):
95 | # Build augmentation
96 | tfm_gens = build_transform_gen(cfg, is_train)
97 |
98 | ret = {
99 | "is_train": is_train,
100 | "tfm_gens": tfm_gens,
101 | "image_format": cfg.INPUT.FORMAT,
102 | }
103 | return ret
104 |
105 | def __call__(self, dataset_dict):
106 | """
107 | Args:
108 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
109 |
110 | Returns:
111 | dict: a format that builtin models in detectron2 accept
112 | """
113 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
114 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
115 | utils.check_image_size(dataset_dict, image)
116 |
117 | image, transforms = T.apply_transform_gens(self.tfm_gens, image)
118 | image_shape = image.shape[:2] # h, w
119 |
120 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
121 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
122 | # Therefore it's important to use torch.Tensor.
123 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
124 |
125 | if not self.is_train:
126 | # USER: Modify this if you want to keep them for some reason.
127 | dataset_dict.pop("annotations", None)
128 | return dataset_dict
129 |
130 | if "pan_seg_file_name" in dataset_dict:
131 | pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
132 | segments_info = dataset_dict["segments_info"]
133 |
134 | # apply the same transformation to panoptic segmentation
135 | pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
136 |
137 | from panopticapi.utils import rgb2id
138 |
139 | pan_seg_gt = rgb2id(pan_seg_gt)
140 |
141 | instances = Instances(image_shape)
142 | classes = []
143 | masks = []
144 | for segment_info in segments_info:
145 | class_id = segment_info["category_id"]
146 | if not segment_info["iscrowd"]:
147 | classes.append(class_id)
148 | masks.append(pan_seg_gt == segment_info["id"])
149 |
150 | classes = np.array(classes)
151 | instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
152 | if len(masks) == 0:
153 | # Some image does not have annotation (all ignored)
154 | instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
155 | instances.gt_boxes = Boxes(torch.zeros((0, 4)))
156 | else:
157 | masks = BitMasks(
158 | torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
159 | )
160 | instances.gt_masks = masks.tensor
161 | instances.gt_boxes = masks.get_bounding_boxes()
162 |
163 | dataset_dict["instances"] = instances
164 |
165 | return dataset_dict
166 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import (
3 | register_ade20k_full,
4 | register_ade20k_panoptic,
5 | register_coco_stuff_10k,
6 | register_mapillary_vistas,
7 | register_coco_panoptic_annos_semseg,
8 | register_ade20k_instance,
9 | register_mapillary_vistas_panoptic,
10 | )
11 |
--------------------------------------------------------------------------------
/projects/YOSO/yoso/data/datasets/register_ade20k_instance.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import json
3 | import logging
4 | import numpy as np
5 | import os
6 | from PIL import Image
7 |
8 | from detectron2.data import DatasetCatalog, MetadataCatalog
9 | from detectron2.data.datasets.coco import load_coco_json, register_coco_instances
10 | from detectron2.utils.file_io import PathManager
11 |
12 | ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}]
13 |
14 |
15 | _PREDEFINED_SPLITS = {
16 | # point annotations without masks
17 | "ade20k_instance_train": (
18 | "ADEChallengeData2016/images/training",
19 | "ADEChallengeData2016/ade20k_instance_train.json",
20 | ),
21 | "ade20k_instance_val": (
22 | "ADEChallengeData2016/images/validation",
23 | "ADEChallengeData2016/ade20k_instance_val.json",
24 | ),
25 | }
26 |
27 |
28 | def _get_ade_instances_meta():
29 | thing_ids = [k["id"] for k in ADE_CATEGORIES]
30 | assert len(thing_ids) == 100, len(thing_ids)
31 | # Mapping from the incontiguous ADE category id to an id in [0, 99]
32 | thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
33 | thing_classes = [k["name"] for k in ADE_CATEGORIES]
34 | ret = {
35 | "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
36 | "thing_classes": thing_classes,
37 | }
38 | return ret
39 |
40 |
41 | def register_all_ade20k_instance(root):
42 | for key, (image_root, json_file) in _PREDEFINED_SPLITS.items():
43 | # Assume pre-defined datasets live in `./datasets`.
44 | register_coco_instances(
45 | key,
46 | _get_ade_instances_meta(),
47 | os.path.join(root, json_file) if "://" not in json_file else json_file,
48 | os.path.join(root, image_root),
49 | )
50 |
51 |
52 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
53 | register_all_ade20k_instance(_root)
54 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length=100
3 | multi_line_output=3
4 | include_trailing_comma=True
5 | known_standard_library=numpy,setuptools,mock
6 | skip=./datasets,docs
7 | skip_glob=*/__init__.py
8 | known_myself=detectron2
9 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx
10 | no_lines_before=STDLIB,THIRDPARTY
11 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
12 | default_section=FIRSTPARTY
13 |
14 | [mypy]
15 | python_version=3.6
16 | ignore_missing_imports = True
17 | warn_unused_configs = True
18 | disallow_untyped_defs = True
19 | check_untyped_defs = True
20 | warn_unused_ignores = True
21 | warn_redundant_casts = True
22 | show_column_numbers = True
23 | follow_imports = silent
24 | allow_redefinition = True
25 | ; Require all functions to be annotated
26 | disallow_incomplete_defs = True
27 |
--------------------------------------------------------------------------------