├── requirements
    ├── readthedocs.txt
    ├── docs.txt
    ├── build.txt
    ├── runtime.txt
    ├── optional.txt
    └── tests.txt
├── images
    ├── teaser_large.png
    └── overview_updated.png
├── resources
    ├── loss_curve.png
    ├── mmdet-logo.png
    ├── data_pipeline.png
    ├── coco_test_12510.jpg
    └── corruptions_sev_3.png
├── requirements.txt
├── docs
    ├── tutorials
    │   └── index.rst
    ├── Makefile
    ├── make.bat
    ├── index.rst
    ├── conventions.md
    ├── api.rst
    ├── stat.py
    └── conf.py
├── .github
    ├── CODE_OF_CONDUCT.md
    └── CONTRIBUTING.md
├── pytest.ini
├── mmdet
    ├── models
    │   ├── necks
    │   │   └── __init__.py
    │   ├── roi_heads
    │   │   ├── shared_heads
    │   │   │   ├── __init__.py
    │   │   │   └── res_layer.py
    │   │   ├── roi_extractors
    │   │   │   ├── __init__.py
    │   │   │   ├── base_roi_extractor.py
    │   │   │   └── generic_roi_extractor.py
    │   │   ├── bbox_heads
    │   │   │   └── __init__.py
    │   │   ├── mask_heads
    │   │   │   ├── __init__.py
    │   │   │   └── htc_mask_head.py
    │   │   ├── __init__.py
    │   │   └── base_roi_head.py
    │   ├── dense_heads
    │   │   ├── __init__.py
    │   │   ├── base_dense_head.py
    │   │   └── rpn_test_mixin.py
    │   ├── utils
    │   │   ├── builder.py
    │   │   └── __init__.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── mask_rcnn.py
    │   │   ├── faster_rcnn.py
    │   │   └── rpn_detector.py
    │   ├── backbones
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── losses
    │   │   ├── mse_loss.py
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── utils.py
    │   │   └── gaussian_focal_loss.py
    │   └── builder.py
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── logger.py
    │   └── profiling.py
    ├── core
    │   ├── visualization
    │   │   └── __init__.py
    │   ├── bbox
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── match_costs
    │   │   │   ├── builder.py
    │   │   │   └── __init__.py
    │   │   ├── assigners
    │   │   │   ├── base_assigner.py
    │   │   │   └── __init__.py
    │   │   ├── coder
    │   │   │   ├── base_bbox_coder.py
    │   │   │   ├── __init__.py
    │   │   │   └── pseudo_bbox_coder.py
    │   │   ├── builder.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   └── random_sampler.py
    │   │   ├── __init__.py
    │   │   └── demodata.py
    │   ├── anchor
    │   │   ├── builder.py
    │   │   ├── __init__.py
    │   │   ├── point_generator.py
    │   │   └── utils.py
    │   ├── export
    │   │   └── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── dist_utils.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   └── deprecated_fp16_utils.py
    │   ├── __init__.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── mask_target.py
    │   ├── post_processing
    │   │   └── __init__.py
    │   └── evaluation
    │   │   ├── __init__.py
    │   │   └── bbox_overlaps.py
    ├── datasets
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── distributed_sampler.py
    │   ├── __init__.py
    │   ├── pipelines
    │   │   ├── compose.py
    │   │   └── __init__.py
    │   ├── coco_split_online.py
    │   └── coco_split_pseudo_masks.py
    ├── version.py
    ├── apis
    │   └── __init__.py
    └── __init__.py
├── setup.cfg
├── tools
    ├── dist_train.sh
    ├── dist_test_bbox.sh
    ├── dist_train_and_test_bbox.sh
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── print_config.py
    ├── merge_annotations.py
    ├── publish_model.py
    ├── interpolate_extracted_masks.py
    ├── get_flops.py
    ├── model_test.py
    ├── browse_dataset.py
    ├── eval_metric.py
    └── regnet2mmdet.py
├── configs
    └── _base_
    │   ├── schedules
    │       ├── schedule_1x.py
    │       ├── schedule_20e.py
    │       └── schedule_2x.py
    │   ├── default_runtime.py
    │   ├── datasets
    │       ├── lvis_v1_instance.py
    │       ├── lvis_v0.5_instance.py
    │       ├── lvis_v0.5_detection.py
    │       ├── lvis_v0.5_detection_shot.py
    │       ├── coco_detection.py
    │       ├── coco_instance.py
    │       ├── deepfashion.py
    │       ├── cityscapes_detection.py
    │       ├── coco_instance_semantic.py
    │       ├── cityscapes_instance.py
    │       ├── voc0712.py
    │       └── wider_face.py
    │   └── models
    │       ├── ssd300.py
    │       ├── retinanet_r50_fpn.py
    │       ├── rpn_r50_caffe_c4.py
    │       ├── rpn_r50_fpn.py
    │       └── fast_rcnn_r50_fpn.py
├── pa_lib
    └── cython_lib
    │   ├── setup.py
    │   └── graph_helper.pyx
├── docker
    └── Dockerfile
├── tests
    ├── test_version.py
    ├── test_data
    │   ├── test_formatting.py
    │   └── test_utils.py
    ├── test_coder.py
    ├── test_models
    │   └── test_position_encoding.py
    ├── test_misc.py
    └── test_async.py
└── .gitignore


/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv
2 | torch
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmdetection
2 | cython
3 | numpy
4 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | mmpycocotools
3 | numpy
4 | six
5 | terminaltables
6 | tensorboard
7 | 


--------------------------------------------------------------------------------
/images/teaser_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/images/teaser_large.png


--------------------------------------------------------------------------------
/resources/loss_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/resources/loss_curve.png


--------------------------------------------------------------------------------
/resources/mmdet-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/resources/mmdet-logo.png


--------------------------------------------------------------------------------
/images/overview_updated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/images/overview_updated.png


--------------------------------------------------------------------------------
/resources/data_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/resources/data_pipeline.png


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.3.2
2 | cityscapesscripts
3 | imagecorruptions
4 | mmlvis
5 | scipy
6 | sklearn
7 | 


--------------------------------------------------------------------------------
/resources/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/resources/coco_test_12510.jpg


--------------------------------------------------------------------------------
/resources/corruptions_sev_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/HEAD/resources/corruptions_sev_3.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | interrogate
 5 | isort==4.3.21
 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 7 | kwarray
 8 | pytest
 9 | ubelt
10 | xdoctest>=0.10.0
11 | yapf
12 | 


--------------------------------------------------------------------------------
/docs/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 2
 3 | 
 4 |    config.md
 5 |    customize_dataset.md
 6 |    data_pipeline.md
 7 |    customize_models.md
 8 |    customize_runtime.md
 9 |    customize_losses.md
10 |    finetune.md
11 |    pytorch2onnx.md
12 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --xdoctest --xdoctest-style=auto
3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs
4 | 
5 | filterwarnings= default
6 |                 ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
7 |                 ignore:.*Define the __nice__ method for.*:Warning
8 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .fpn import FPN
 9 | 
10 | __all__ = [
11 |     "FPN",
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .res_layer import ResLayer
 9 | 
10 | __all__ = ["ResLayer"]
11 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .collect_env import collect_env
 9 | from .logger import get_root_logger
10 | 
11 | __all__ = ["get_root_logger", "collect_env"]
12 | 


--------------------------------------------------------------------------------
/mmdet/core/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .image import color_val_matplotlib, imshow_det_bboxes, imshow_gt_det_bboxes
 9 | 
10 | __all__ = ["imshow_det_bboxes", "imshow_gt_det_bboxes", "color_val_matplotlib"]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .builder import build_iou_calculator
 9 | from .iou2d_calculator import bbox_overlaps, BboxOverlaps2D
10 | 
11 | __all__ = ["build_iou_calculator", "BboxOverlaps2D", "bbox_overlaps"]
12 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .distributed_sampler import DistributedSampler
 9 | from .group_sampler import DistributedGroupSampler, GroupSampler
10 | 
11 | __all__ = ["DistributedSampler", "DistributedGroupSampler", "GroupSampler"]
12 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .anchor_head import AnchorHead
 9 | from .oln_rpn_head import OlnRPNHead
10 | from .rpn_head import RPNHead
11 | 
12 | 
13 | __all__ = [
14 |     "AnchorHead",
15 |     "RPNHead",
16 |     "OlnRPNHead",
17 | ]
18 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .generic_roi_extractor import GenericRoIExtractor
 9 | from .single_level_roi_extractor import SingleRoIExtractor
10 | 
11 | __all__ = [
12 |     "SingleRoIExtractor",
13 |     "GenericRoIExtractor",
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.utils import build_from_cfg, Registry
 9 | 
10 | ANCHOR_GENERATORS = Registry("Anchor generator")
11 | 
12 | 
13 | def build_anchor_generator(cfg, default_args=None):
14 |     return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args)
15 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/match_costs/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.utils import build_from_cfg, Registry
 9 | 
10 | MATCH_COST = Registry("Match Cost")
11 | 
12 | 
13 | def build_match_cost(cfg, default_args=None):
14 |     """Builder of IoU calculator."""
15 |     return build_from_cfg(cfg, MATCH_COST, default_args)
16 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 79
 3 | multi_line_output = 0
 4 | known_standard_library = setuptools
 5 | known_first_party = mmdet
 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch
 7 | no_lines_before = STDLIB,LOCALFOLDER
 8 | default_section = THIRDPARTY
 9 | 
10 | [yapf]
11 | BASED_ON_STYLE = pep8
12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
14 | 


--------------------------------------------------------------------------------
/mmdet/core/export/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .pytorch2onnx import (
 9 |     build_model_from_cfg,
10 |     generate_inputs_and_wrap_model,
11 |     preprocess_example_input,
12 | )
13 | 
14 | __all__ = [
15 |     "build_model_from_cfg",
16 |     "generate_inputs_and_wrap_model",
17 |     "preprocess_example_input",
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.utils import build_from_cfg, Registry
 9 | 
10 | IOU_CALCULATORS = Registry("IoU calculator")
11 | 
12 | 
13 | def build_iou_calculator(cfg, default_args=None):
14 |     """Builder of IoU calculator."""
15 |     return build_from_cfg(cfg, IOU_CALCULATORS, default_args)
16 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .builder import build_match_cost
 9 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost
10 | 
11 | __all__ = [
12 |     "build_match_cost",
13 |     "ClassificationCost",
14 |     "BBoxL1Cost",
15 |     "IoUCost",
16 |     "FocalLossCost",
17 | ]
18 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | # All rights reserved.
 5 | 
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree
 8 | 
 9 | 
10 | CONFIG=$1
11 | GPUS=$2
12 | PORT=${PORT:-29500}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
16 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch "${@:3}" --no-validate
17 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .dist_utils import allreduce_grads, DistOptimizerHook, reduce_mean
 9 | from .misc import mask2ndarray, multi_apply, unmap
10 | 
11 | __all__ = [
12 |     "allreduce_grads",
13 |     "DistOptimizerHook",
14 |     "reduce_mean",
15 |     "multi_apply",
16 |     "unmap",
17 |     "mask2ndarray",
18 | ]
19 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # optimizer
 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001)
10 | optimizer_config = dict(grad_clip=None)
11 | # learning policy
12 | lr_config = dict(
13 |     policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]
14 | )
15 | total_epochs = 12
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # optimizer
 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001)
10 | optimizer_config = dict(grad_clip=None)
11 | # learning policy
12 | lr_config = dict(
13 |     policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 19]
14 | )
15 | total_epochs = 20
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # optimizer
 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001)
10 | optimizer_config = dict(grad_clip=None)
11 | # learning policy
12 | lr_config = dict(
13 |     policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22]
14 | )
15 | total_epochs = 24
16 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .deprecated_fp16_utils import (
 9 |     deprecated_auto_fp16 as auto_fp16,
10 |     deprecated_force_fp32 as force_fp32,
11 |     deprecated_wrap_fp16_model as wrap_fp16_model,
12 |     DeprecatedFp16OptimizerHook as Fp16OptimizerHook,
13 | )
14 | 
15 | __all__ = ["auto_fp16", "force_fp32", "Fp16OptimizerHook", "wrap_fp16_model"]
16 | 


--------------------------------------------------------------------------------
/tools/dist_test_bbox.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | #!/usr/bin/env bash
 9 | 
10 | CONFIG=$1
11 | CHECKPOINT=$2
12 | GPUS=$3
13 | PORT=${PORT:-29500}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
17 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch "${@:4}" \
18 |     --eval bbox
19 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .anchor import *  # noqa: F401, F403
 9 | from .bbox import *  # noqa: F401, F403
10 | from .evaluation import *  # noqa: F401, F403
11 | from .export import *  # noqa: F401, F403
12 | from .fp16 import *  # noqa: F401, F403
13 | from .mask import *  # noqa: F401, F403
14 | from .post_processing import *  # noqa: F401, F403
15 | from .utils import *  # noqa: F401, F403
16 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .mask_target import mask_target
 9 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks
10 | from .utils import encode_mask_results, split_combined_polys
11 | 
12 | __all__ = [
13 |     "split_combined_polys",
14 |     "mask_target",
15 |     "BaseInstanceMasks",
16 |     "BitmapMasks",
17 |     "PolygonMasks",
18 |     "encode_mask_results",
19 | ]
20 | 


--------------------------------------------------------------------------------
/pa_lib/cython_lib/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from distutils.core import setup
 9 | from distutils.extension import Extension
10 | 
11 | import numpy
12 | from Cython.Build import cythonize
13 | 
14 | ext_modules = [
15 |     Extension(r"cython_lib.graph_helper", [r"graph_helper.pyx"]),
16 | ]
17 | 
18 | setup(
19 |     name="cython_lib",
20 |     ext_modules=cythonize(ext_modules),
21 |     include_dirs=[numpy.get_include()],
22 | )
23 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | checkpoint_config = dict(interval=1)
 9 | # yapf:disable
10 | log_config = dict(
11 |     interval=50,
12 |     hooks=[
13 |         dict(type="TextLoggerHook"),
14 |         # dict(type='TensorboardLoggerHook')
15 |     ],
16 | )
17 | # yapf:enable
18 | dist_params = dict(backend="nccl")
19 | log_level = "INFO"
20 | load_from = None
21 | resume_from = None
22 | workflow = [("train", 1)]
23 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from abc import ABCMeta, abstractmethod
 9 | 
10 | 
11 | class BaseAssigner(metaclass=ABCMeta):
12 |     """Base assigner that assigns boxes to ground truth boxes."""
13 | 
14 |     @abstractmethod
15 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
16 |         """Assign boxes to either a ground truth boxe or a negative boxes."""
17 |         pass
18 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .bbox_nms import fast_nms, multiclass_nms
 9 | from .merge_augs import (
10 |     merge_aug_bboxes,
11 |     merge_aug_masks,
12 |     merge_aug_proposals,
13 |     merge_aug_scores,
14 | )
15 | 
16 | __all__ = [
17 |     "multiclass_nms",
18 |     "merge_aug_proposals",
19 |     "merge_aug_bboxes",
20 |     "merge_aug_scores",
21 |     "merge_aug_masks",
22 |     "fast_nms",
23 | ]
24 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .bbox_head import BBoxHead
 9 | from .convfc_bbox_head import ConvFCBBoxHead, Shared2FCBBoxHead, Shared4Conv1FCBBoxHead
10 | from .convfc_bbox_score_head import ConvFCBBoxScoreHead, Shared2FCBBoxScoreHead
11 | 
12 | __all__ = [
13 |     "BBoxHead",
14 |     "ConvFCBBoxHead",
15 |     "Shared2FCBBoxHead",
16 |     "Shared4Conv1FCBBoxHead",
17 |     "ConvFCBBoxScoreHead",
18 |     "Shared2FCBBoxScoreHead",
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import mmdet
 9 | from mmcv.utils import collect_env as collect_base_env, get_git_hash
10 | 
11 | 
12 | def collect_env():
13 |     """Collect the information of the running environments."""
14 |     env_info = collect_base_env()
15 |     env_info["MMDetection"] = mmdet.__version__ + "+" + get_git_hash()[:7]
16 |     return env_info
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     for name, val in collect_env().items():
21 |         print(f"{name}: {val}")
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.utils import build_from_cfg, Registry
 9 | 
10 | TRANSFORMER = Registry("Transformer")
11 | POSITIONAL_ENCODING = Registry("Position encoding")
12 | 
13 | 
14 | def build_transformer(cfg, default_args=None):
15 |     """Builder for Transformer."""
16 |     return build_from_cfg(cfg, TRANSFORMER, default_args)
17 | 
18 | 
19 | def build_positional_encoding(cfg, default_args=None):
20 |     """Builder for Position Encoding."""
21 |     return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)
22 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .coarse_mask_head import CoarseMaskHead
 9 | from .fcn_mask_head import FCNMaskHead
10 | from .fused_semantic_head import FusedSemanticHead
11 | from .grid_head import GridHead
12 | from .htc_mask_head import HTCMaskHead
13 | from .mask_point_head import MaskPointHead
14 | from .maskiou_head import MaskIoUHead
15 | 
16 | __all__ = [
17 |     "FCNMaskHead",
18 |     "HTCMaskHead",
19 |     "FusedSemanticHead",
20 |     "GridHead",
21 |     "MaskIoUHead",
22 |     "CoarseMaskHead",
23 |     "MaskPointHead",
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import logging
 9 | 
10 | from mmcv.utils import get_logger
11 | 
12 | 
13 | def get_root_logger(log_file=None, log_level=logging.INFO):
14 |     """Get root logger.
15 | 
16 |     Args:
17 |         log_file (str, optional): File path of log. Defaults to None.
18 |         log_level (int, optional): The level of logger.
19 |             Defaults to logging.INFO.
20 | 
21 |     Returns:
22 |         :obj:`logging.Logger`: The obtained logger
23 |     """
24 |     logger = get_logger(name="mmdet", log_file=log_file, log_level=log_level)
25 | 
26 |     return logger
27 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/base_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from abc import ABCMeta, abstractmethod
 9 | 
10 | 
11 | class BaseBBoxCoder(metaclass=ABCMeta):
12 |     """Base bounding box coder."""
13 | 
14 |     def __init__(self, **kwargs):
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def encode(self, bboxes, gt_bboxes):
19 |         """Encode deltas between bboxes and ground truth boxes."""
20 |         pass
21 | 
22 |     @abstractmethod
23 |     def decode(self, bboxes, bboxes_pred):
24 |         """Decode the predicted bboxes according to prediction and base
25 |         boxes."""
26 |         pass
27 | 


--------------------------------------------------------------------------------
/tools/dist_train_and_test_bbox.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | # All rights reserved.
 5 | 
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree
 8 | 
 9 | 
10 | CONFIG=$1
11 | CHECKPOINT=$2
12 | GPUS=$3
13 | PORT=${PORT:-29500}
14 | 
15 | 
16 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
17 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
18 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch "${@:4}" --no-validate
19 | 
20 | 
21 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
22 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
23 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch "${@:4}" \
24 |     --eval bbox
25 | 


--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # Copyright (c) Open-MMLab. All rights reserved.
 9 | 
10 | __version__ = "2.8.0"
11 | short_version = __version__
12 | 
13 | 
14 | def parse_version_info(version_str):
15 |     version_info = []
16 |     for x in version_str.split("."):
17 |         if x.isdigit():
18 |             version_info.append(int(x))
19 |         elif x.find("rc") != -1:
20 |             patch_version = x.split("rc")
21 |             version_info.append(int(patch_version[0]))
22 |             version_info.append(f"rc{patch_version[1]}")
23 |     return tuple(version_info)
24 | 
25 | 
26 | version_info = parse_version_info(__version__)
27 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .anchor_generator import (
 9 |     AnchorGenerator,
10 |     LegacyAnchorGenerator,
11 |     YOLOAnchorGenerator,
12 | )
13 | from .builder import ANCHOR_GENERATORS, build_anchor_generator
14 | from .point_generator import PointGenerator
15 | from .utils import anchor_inside_flags, calc_region, images_to_levels
16 | 
17 | __all__ = [
18 |     "AnchorGenerator",
19 |     "LegacyAnchorGenerator",
20 |     "anchor_inside_flags",
21 |     "PointGenerator",
22 |     "images_to_levels",
23 |     "calc_region",
24 |     "build_anchor_generator",
25 |     "ANCHOR_GENERATORS",
26 |     "YOLOAnchorGenerator",
27 | ]
28 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .base_bbox_coder import BaseBBoxCoder
 9 | from .bucketing_bbox_coder import BucketingBBoxCoder
10 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder
11 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder
12 | from .pseudo_bbox_coder import PseudoBBoxCoder
13 | from .tblr_bbox_coder import TBLRBBoxCoder
14 | from .yolo_bbox_coder import YOLOBBoxCoder
15 | 
16 | __all__ = [
17 |     "BaseBBoxCoder",
18 |     "PseudoBBoxCoder",
19 |     "DeltaXYWHBBoxCoder",
20 |     "LegacyDeltaXYWHBBoxCoder",
21 |     "TBLRBBoxCoder",
22 |     "YOLOBBoxCoder",
23 |     "BucketingBBoxCoder",
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/pseudo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from ..builder import BBOX_CODERS
 9 | from .base_bbox_coder import BaseBBoxCoder
10 | 
11 | 
12 | @BBOX_CODERS.register_module()
13 | class PseudoBBoxCoder(BaseBBoxCoder):
14 |     """Pseudo bounding box coder."""
15 | 
16 |     def __init__(self, **kwargs):
17 |         super(BaseBBoxCoder, self).__init__(**kwargs)
18 | 
19 |     def encode(self, bboxes, gt_bboxes):
20 |         """torch.Tensor: return the given ``bboxes``"""
21 |         return gt_bboxes
22 | 
23 |     def decode(self, bboxes, pred_bboxes):
24 |         """torch.Tensor: return the given ``pred_bboxes``"""
25 |         return pred_bboxes
26 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.utils import build_from_cfg, Registry
 9 | 
10 | BBOX_ASSIGNERS = Registry("bbox_assigner")
11 | BBOX_SAMPLERS = Registry("bbox_sampler")
12 | BBOX_CODERS = Registry("bbox_coder")
13 | 
14 | 
15 | def build_assigner(cfg, **default_args):
16 |     """Builder of box assigner."""
17 |     return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args)
18 | 
19 | 
20 | def build_sampler(cfg, **default_args):
21 |     """Builder of box sampler."""
22 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
23 | 
24 | 
25 | def build_bbox_coder(cfg, **default_args):
26 |     """Builder of box coder."""
27 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
28 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | # All rights reserved.
 5 | 
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree
 8 | 
 9 | 
10 | set -x
11 | 
12 | PARTITION=$1
13 | JOB_NAME=$2
14 | CONFIG=$3
15 | CHECKPOINT=$4
16 | GPUS=${GPUS:-8}
17 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
18 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
19 | PY_ARGS=${@:5}
20 | SRUN_ARGS=${SRUN_ARGS:-""}
21 | 
22 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
23 | srun -p ${PARTITION} \
24 |     --job-name=${JOB_NAME} \
25 |     --gres=gpu:${GPUS_PER_NODE} \
26 |     --ntasks=${GPUS} \
27 |     --ntasks-per-node=${GPUS_PER_NODE} \
28 |     --cpus-per-task=${CPUS_PER_TASK} \
29 |     --kill-on-bad-exit=1 \
30 |     ${SRUN_ARGS} \
31 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
32 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH="1.6.0"
 2 | ARG CUDA="10.1"
 3 | ARG CUDNN="7"
 4 | 
 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 6 | 
 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
10 | 
11 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | # Install MMCV
16 | RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
17 | 
18 | # Install MMDetection
19 | RUN conda clean --all
20 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
21 | WORKDIR /mmdetection
22 | ENV FORCE_CUDA="1"
23 | RUN pip install -r requirements/build.txt
24 | RUN pip install --no-cache-dir -e .
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 5 | # All rights reserved.
 6 | 
 7 | # This source code is licensed under the license found in the
 8 | # LICENSE file in the root directory of this source tree
 9 | 
10 | 
11 | set -x
12 | 
13 | PARTITION=$1
14 | JOB_NAME=$2
15 | CONFIG=$3
16 | WORK_DIR=$4
17 | GPUS=${GPUS:-8}
18 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
19 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
20 | SRUN_ARGS=${SRUN_ARGS:-""}
21 | PY_ARGS=${@:5}
22 | 
23 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
24 | srun -p ${PARTITION} \
25 |     --job-name=${JOB_NAME} \
26 |     --gres=gpu:${GPUS_PER_NODE} \
27 |     --ntasks=${GPUS} \
28 |     --ntasks-per-node=${GPUS_PER_NODE} \
29 |     --cpus-per-task=${CPUS_PER_TASK} \
30 |     --kill-on-bad-exit=1 \
31 |     ${SRUN_ARGS} \
32 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
33 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .inference import (
 9 |     async_inference_detector,
10 |     inference_detector,
11 |     init_detector,
12 |     show_result_pyplot,
13 | )
14 | from .test import (
15 |     collect_results_cpu,
16 |     collect_results_gpu,
17 |     multi_gpu_test,
18 |     single_gpu_test,
19 | )
20 | from .train import get_root_logger, set_random_seed, train_detector
21 | 
22 | __all__ = [
23 |     "get_root_logger",
24 |     "set_random_seed",
25 |     "train_detector",
26 |     "init_detector",
27 |     "async_inference_detector",
28 |     "inference_detector",
29 |     "show_result_pyplot",
30 |     "multi_gpu_test",
31 |     "single_gpu_test",
32 |     "collect_results_cpu",
33 |     "collect_results_gpu",
34 | ]
35 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to MMDetection's documentation!
 2 | =======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Get Started
 7 | 
 8 |    get_started.md
 9 |    modelzoo_statistics.md
10 |    model_zoo.md
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 |    :caption: Quick Run
15 | 
16 |    1_exist_data_model.md
17 |    2_new_data_model.md
18 | 
19 | .. toctree::
20 |    :maxdepth: 2
21 |    :caption: Tutorials
22 | 
23 |    tutorials/index.rst
24 | 
25 | .. toctree::
26 |    :maxdepth: 2
27 |    :caption: Useful Tools and Scripts
28 | 
29 |    useful_tools.md
30 | 
31 | .. toctree::
32 |    :maxdepth: 2
33 |    :caption: Notes
34 | 
35 |    conventions.md
36 |    compatibility.md
37 |    projects.md
38 |    changelog.md
39 |    faq.md
40 | 
41 | .. toctree::
42 |    :caption: API Reference
43 | 
44 |    api.rst
45 | 
46 | Indices and tables
47 | ==================
48 | 
49 | * :ref:`genindex`
50 | * :ref:`search`
51 | 


--------------------------------------------------------------------------------
/tools/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | 
10 | from mmcv import Config, DictAction
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(description="Print the whole config")
15 |     parser.add_argument("config", help="config file path")
16 |     parser.add_argument(
17 |         "--options", nargs="+", action=DictAction, help="arguments in dict"
18 |     )
19 |     args = parser.parse_args()
20 | 
21 |     return args
22 | 
23 | 
24 | def main():
25 |     args = parse_args()
26 | 
27 |     cfg = Config.fromfile(args.config)
28 |     if args.options is not None:
29 |         cfg.merge_from_dict(args.options)
30 |     print(f"Config:\n{cfg.pretty_text}")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .base import BaseDetector
 9 | from .faster_rcnn import FasterRCNN
10 | from .mask_rcnn import MaskRCNN
11 | from .pa_predictor import (
12 |     PairwiseAffinityHead,
13 |     PairwiseAffinityHeadUperNet,
14 |     PairwiseAffinityPredictor,
15 | )
16 | from .rpn import RPN
17 | 
18 | #
19 | from .rpn_detector import RPNDetector
20 | from .two_stage import TwoStageDetector
21 | from .two_tower import TwoTowerDetector
22 | 
23 | __all__ = [
24 |     "BaseDetector",
25 |     "TwoStageDetector",
26 |     "RPN",
27 |     "FasterRCNN",
28 |     "MaskRCNN",
29 |     "RPNDetector",
30 |     "PairwiseAffinityPredictor",
31 |     "PairwiseAffinityHead",
32 |     "PairwiseAffinityHeadUperNet",
33 |     "TwoTowerDetector",
34 | ]
35 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .base_sampler import BaseSampler
 9 | from .combined_sampler import CombinedSampler
10 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
11 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
12 | from .ohem_sampler import OHEMSampler
13 | from .pseudo_sampler import PseudoSampler
14 | from .random_sampler import RandomSampler
15 | from .sampling_result import SamplingResult
16 | from .score_hlr_sampler import ScoreHLRSampler
17 | 
18 | __all__ = [
19 |     "BaseSampler",
20 |     "PseudoSampler",
21 |     "RandomSampler",
22 |     "InstanceBalancedPosSampler",
23 |     "IoUBalancedNegSampler",
24 |     "CombinedSampler",
25 |     "OHEMSampler",
26 |     "SamplingResult",
27 |     "ScoreHLRSampler",
28 | ]
29 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .darknet import Darknet
 9 | from .detectors_resnet import DetectoRS_ResNet
10 | from .detectors_resnext import DetectoRS_ResNeXt
11 | from .hourglass import HourglassNet
12 | from .hrnet import HRNet
13 | from .regnet import RegNet
14 | from .res2net import Res2Net
15 | from .resnest import ResNeSt
16 | from .resnet import ResNet, ResNetV1d
17 | from .resnext import ResNeXt
18 | from .ssd_vgg import SSDVGG
19 | from .trident_resnet import TridentResNet
20 | 
21 | __all__ = [
22 |     "RegNet",
23 |     "ResNet",
24 |     "ResNetV1d",
25 |     "ResNeXt",
26 |     "SSDVGG",
27 |     "HRNet",
28 |     "Res2Net",
29 |     "HourglassNet",
30 |     "DetectoRS_ResNet",
31 |     "DetectoRS_ResNeXt",
32 |     "Darknet",
33 |     "ResNeSt",
34 |     "TridentResNet",
35 | ]
36 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from ..builder import DETECTORS
 9 | from .two_stage import TwoStageDetector
10 | 
11 | 
12 | @DETECTORS.register_module()
13 | class MaskRCNN(TwoStageDetector):
14 |     """Implementation of `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_"""
15 | 
16 |     def __init__(
17 |         self,
18 |         backbone,
19 |         rpn_head,
20 |         roi_head,
21 |         train_cfg,
22 |         test_cfg,
23 |         neck=None,
24 |         pretrained=None,
25 |     ):
26 |         super(MaskRCNN, self).__init__(
27 |             backbone=backbone,
28 |             neck=neck,
29 |             rpn_head=rpn_head,
30 |             roi_head=roi_head,
31 |             train_cfg=train_cfg,
32 |             test_cfg=test_cfg,
33 |             pretrained=pretrained,
34 |         )
35 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from ..builder import DETECTORS
 9 | from .two_stage import TwoStageDetector
10 | 
11 | 
12 | @DETECTORS.register_module()
13 | class FasterRCNN(TwoStageDetector):
14 |     """Implementation of `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_"""
15 | 
16 |     def __init__(
17 |         self,
18 |         backbone,
19 |         rpn_head,
20 |         roi_head,
21 |         train_cfg,
22 |         test_cfg,
23 |         neck=None,
24 |         pretrained=None,
25 |     ):
26 |         super(FasterRCNN, self).__init__(
27 |             backbone=backbone,
28 |             neck=neck,
29 |             rpn_head=rpn_head,
30 |             roi_head=roi_head,
31 |             train_cfg=train_cfg,
32 |             test_cfg=test_cfg,
33 |             pretrained=pretrained,
34 |         )
35 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from ..builder import BBOX_SAMPLERS, build_sampler
 9 | from .base_sampler import BaseSampler
10 | 
11 | 
12 | @BBOX_SAMPLERS.register_module()
13 | class CombinedSampler(BaseSampler):
14 |     """A sampler that combines positive sampler and negative sampler."""
15 | 
16 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
17 |         super(CombinedSampler, self).__init__(**kwargs)
18 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
19 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
20 | 
21 |     def _sample_pos(self, **kwargs):
22 |         """Sample positive samples."""
23 |         raise NotImplementedError
24 | 
25 |     def _sample_neg(self, **kwargs):
26 |         """Sample negative samples."""
27 |         raise NotImplementedError
28 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 9 | from .assign_result import AssignResult
10 | from .atss_assigner import ATSSAssigner
11 | from .base_assigner import BaseAssigner
12 | from .center_region_assigner import CenterRegionAssigner
13 | from .grid_assigner import GridAssigner
14 | from .hungarian_assigner import HungarianAssigner
15 | from .max_iou_assigner import MaxIoUAssigner
16 | from .point_assigner import PointAssigner
17 | from .region_assigner import RegionAssigner
18 | 
19 | __all__ = [
20 |     "BaseAssigner",
21 |     "MaxIoUAssigner",
22 |     "ApproxMaxIoUAssigner",
23 |     "AssignResult",
24 |     "PointAssigner",
25 |     "ATSSAssigner",
26 |     "CenterRegionAssigner",
27 |     "GridAssigner",
28 |     "HungarianAssigner",
29 |     "RegionAssigner",
30 | ]
31 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmdet import digit_version
 9 | 
10 | 
11 | def test_version_check():
12 |     assert digit_version("1.0.5") > digit_version("1.0.5rc0")
13 |     assert digit_version("1.0.5") > digit_version("1.0.4rc0")
14 |     assert digit_version("1.0.5") > digit_version("1.0rc0")
15 |     assert digit_version("1.0.0") > digit_version("0.6.2")
16 |     assert digit_version("1.0.0") > digit_version("0.2.16")
17 |     assert digit_version("1.0.5rc0") > digit_version("1.0.0rc0")
18 |     assert digit_version("1.0.0rc1") > digit_version("1.0.0rc0")
19 |     assert digit_version("1.0.0rc2") > digit_version("1.0.0rc0")
20 |     assert digit_version("1.0.0rc2") > digit_version("1.0.0rc1")
21 |     assert digit_version("1.0.1rc1") > digit_version("1.0.0rc1")
22 |     assert digit_version("1.0.0") > digit_version("1.0.0rc1")
23 | 


--------------------------------------------------------------------------------
/tests/test_data/test_formatting.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import os.path as osp
 9 | 
10 | from mmcv.utils import build_from_cfg
11 | from mmdet.datasets.builder import PIPELINES
12 | 
13 | 
14 | def test_default_format_bundle():
15 |     results = dict(
16 |         img_prefix=osp.join(osp.dirname(__file__), "../data"),
17 |         img_info=dict(filename="color.jpg"),
18 |     )
19 |     load = dict(type="LoadImageFromFile")
20 |     load = build_from_cfg(load, PIPELINES)
21 |     bundle = dict(type="DefaultFormatBundle")
22 |     bundle = build_from_cfg(bundle, PIPELINES)
23 |     results = load(results)
24 |     assert "pad_shape" not in results
25 |     assert "scale_factor" not in results
26 |     assert "img_norm_cfg" not in results
27 |     results = bundle(results)
28 |     assert "pad_shape" in results
29 |     assert "scale_factor" in results
30 |     assert "img_norm_cfg" in results
31 | 


--------------------------------------------------------------------------------
/tools/merge_annotations.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import json
 9 | 
10 | splits = 4
11 | 
12 | # COCO
13 | pseudo_mask_path = "WORK_DIR/masks_{}.json"
14 | ref_json_path = "DATA_DIR/instances.json"
15 | 
16 | output_path = pseudo_mask_path.format("all")
17 | 
18 | # JOIN into processed
19 | ref_json = json.load(open(ref_json_path, "rb"))
20 | max_id = 0
21 | for ann in ref_json["annotations"]:
22 |     max_id = max(ann["id"], max_id)
23 | 
24 | pseudo_masks = []
25 | for shard in range(splits):
26 |     ann_json = json.load(open(pseudo_mask_path.format(shard), "rb"))
27 |     for img_ann in ann_json:
28 |         for ann in img_ann:
29 |             max_id += 1
30 |             ann["id"] = max_id
31 |             pseudo_masks.append(ann)
32 | 
33 | ref_json["annotations"] = pseudo_masks
34 | 
35 | print(f"generated {len(pseudo_masks)} pseudo masks")
36 | json.dump(ref_json, open(output_path, "w"))
37 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | _base_ = "coco_instance.py"
 9 | dataset_type = "LVISV1Dataset"
10 | data_root = "data/lvis_v1/"
11 | data = dict(
12 |     samples_per_gpu=2,
13 |     workers_per_gpu=2,
14 |     train=dict(
15 |         _delete_=True,
16 |         type="ClassBalancedDataset",
17 |         oversample_thr=1e-3,
18 |         dataset=dict(
19 |             type=dataset_type,
20 |             ann_file=data_root + "annotations/lvis_v1_train.json",
21 |             img_prefix=data_root,
22 |         ),
23 |     ),
24 |     val=dict(
25 |         type=dataset_type,
26 |         ann_file=data_root + "annotations/lvis_v1_val.json",
27 |         img_prefix=data_root,
28 |     ),
29 |     test=dict(
30 |         type=dataset_type,
31 |         ann_file=data_root + "annotations/lvis_v1_val.json",
32 |         img_prefix=data_root,
33 |     ),
34 | )
35 | evaluation = dict(metric=["bbox", "segm"])
36 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .class_names import (
 9 |     cityscapes_classes,
10 |     coco_classes,
11 |     dataset_aliases,
12 |     get_classes,
13 |     imagenet_det_classes,
14 |     imagenet_vid_classes,
15 |     voc_classes,
16 | )
17 | from .eval_hooks import DistEvalHook, EvalHook
18 | from .mean_ap import average_precision, eval_map, print_map_summary
19 | from .recall import eval_recalls, plot_iou_recall, plot_num_recall, print_recall_summary
20 | 
21 | __all__ = [
22 |     "voc_classes",
23 |     "imagenet_det_classes",
24 |     "imagenet_vid_classes",
25 |     "coco_classes",
26 |     "cityscapes_classes",
27 |     "dataset_aliases",
28 |     "get_classes",
29 |     "DistEvalHook",
30 |     "EvalHook",
31 |     "average_precision",
32 |     "eval_map",
33 |     "print_map_summary",
34 |     "eval_recalls",
35 |     "print_recall_summary",
36 |     "plot_num_recall",
37 |     "plot_iou_recall",
38 | ]
39 | 


--------------------------------------------------------------------------------
/docs/conventions.md:
--------------------------------------------------------------------------------
 1 | # Conventions
 2 | 
 3 | Please check the following conventions if you would like to modify MMDetection as your own project.
 4 | 
 5 | ## Loss
 6 | 
 7 | In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`.
 8 | 
 9 | For example, in bbox head,
10 | 
11 | ```python
12 | class BBoxHead(nn.Module):
13 |     ...
14 |     def loss(self, ...):
15 |         losses = dict()
16 |         # classification loss
17 |         losses['loss_cls'] = self.loss_cls(...)
18 |         # classification accuracy
19 |         losses['acc'] = accuracy(...)
20 |         # bbox regression loss
21 |         losses['loss_bbox'] = self.loss_bbox(...)
22 |         return losses
23 | ```
24 | 
25 | `bbox_head.loss()` will be called during model forward.
26 | The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` .
27 | Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation,
28 | `'acc'` will only be used as a metric to monitor training process.
29 | 
30 | By default, only values whose keys contain `'loss'` will be back propagated.
31 | This behavior could be changed by modifying `BaseDetector.train_step()`.
32 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .base_roi_head import BaseRoIHead
 9 | from .bbox_heads import (
10 |     BBoxHead,
11 |     ConvFCBBoxHead,
12 |     Shared2FCBBoxHead,
13 |     Shared4Conv1FCBBoxHead,
14 | )
15 | from .mask_heads import (
16 |     CoarseMaskHead,
17 |     FCNMaskHead,
18 |     FusedSemanticHead,
19 |     GridHead,
20 |     HTCMaskHead,
21 |     MaskIoUHead,
22 |     MaskPointHead,
23 | )
24 | from .oln_roi_head import OlnRoIHead
25 | from .rec_roi_head import RecRoIHead
26 | from .roi_extractors import SingleRoIExtractor
27 | from .shared_heads import ResLayer
28 | from .standard_roi_head import StandardRoIHead
29 | 
30 | __all__ = [
31 |     "BaseRoIHead",
32 |     "ResLayer",
33 |     "BBoxHead",
34 |     "ConvFCBBoxHead",
35 |     "Shared2FCBBoxHead",
36 |     "StandardRoIHead",
37 |     "Shared4Conv1FCBBoxHead",
38 |     "FCNMaskHead",
39 |     "SingleRoIExtractor",
40 |     "OlnRoIHead",
41 |     "RecRoIHead",
42 | ]
43 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .builder import build_positional_encoding, build_transformer
 9 | from .gaussian_target import gaussian_radius, gen_gaussian_target
10 | from .positional_encoding import LearnedPositionalEncoding, SinePositionalEncoding
11 | from .res_layer import ResLayer
12 | from .transformer import (
13 |     FFN,
14 |     MultiheadAttention,
15 |     Transformer,
16 |     TransformerDecoder,
17 |     TransformerDecoderLayer,
18 |     TransformerEncoder,
19 |     TransformerEncoderLayer,
20 | )
21 | 
22 | __all__ = [
23 |     "ResLayer",
24 |     "gaussian_radius",
25 |     "gen_gaussian_target",
26 |     "MultiheadAttention",
27 |     "FFN",
28 |     "TransformerEncoderLayer",
29 |     "TransformerEncoder",
30 |     "TransformerDecoderLayer",
31 |     "TransformerDecoder",
32 |     "Transformer",
33 |     "build_transformer",
34 |     "build_positional_encoding",
35 |     "SinePositionalEncoding",
36 |     "LearnedPositionalEncoding",
37 | ]
38 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | _base_ = "coco_instance.py"
 9 | dataset_type = "LVISV05Dataset"
10 | # data_root = 'data/lvis_v0.5/'
11 | data_root = "/data2/LVIS/"
12 | data = dict(
13 |     samples_per_gpu=2,
14 |     workers_per_gpu=2,
15 |     train=dict(
16 |         _delete_=True,
17 |         type="ClassBalancedDataset",
18 |         oversample_thr=1e-3,
19 |         dataset=dict(
20 |             type=dataset_type,
21 |             ann_file=data_root + "annotations/lvis_v0.5_train.json",
22 |             img_prefix=data_root + "train2017/",
23 |         ),
24 |     ),
25 |     val=dict(
26 |         type=dataset_type,
27 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
28 |         img_prefix=data_root + "val2017/",
29 |     ),
30 |     test=dict(
31 |         type=dataset_type,
32 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
33 |         img_prefix=data_root + "val2017/",
34 |     ),
35 | )
36 | evaluation = dict(metric=["bbox", "segm"])
37 | 


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import mmcv
 9 | 
10 | from .version import __version__, short_version
11 | 
12 | 
13 | def digit_version(version_str):
14 |     digit_version = []
15 |     for x in version_str.split("."):
16 |         if x.isdigit():
17 |             digit_version.append(int(x))
18 |         elif x.find("rc") != -1:
19 |             patch_version = x.split("rc")
20 |             digit_version.append(int(patch_version[0]) - 1)
21 |             digit_version.append(int(patch_version[1]))
22 |     return digit_version
23 | 
24 | 
25 | mmcv_minimum_version = "1.2.4"
26 | # mmcv_maximum_version = '1.3'
27 | mmcv_version = digit_version(mmcv.__version__)
28 | 
29 | 
30 | assert mmcv_version >= digit_version(mmcv_minimum_version)
31 | #         and mmcv_version <= digit_version(mmcv_maximum_version)), \
32 | #     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
33 | #     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
34 | 
35 | __all__ = ["__version__", "short_version"]
36 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .backbones import *  # noqa: F401,F403
 9 | from .builder import (
10 |     BACKBONES,
11 |     build_backbone,
12 |     build_detector,
13 |     build_head,
14 |     build_loss,
15 |     build_neck,
16 |     build_roi_extractor,
17 |     build_shared_head,
18 |     DETECTORS,
19 |     HEADS,
20 |     LOSSES,
21 |     NECKS,
22 |     ROI_EXTRACTORS,
23 |     SHARED_HEADS,
24 | )
25 | from .dense_heads import *  # noqa: F401,F403
26 | from .detectors import *  # noqa: F401,F403
27 | from .losses import *  # noqa: F401,F403
28 | from .necks import *  # noqa: F401,F403
29 | from .roi_heads import *  # noqa: F401,F403
30 | 
31 | __all__ = [
32 |     "BACKBONES",
33 |     "NECKS",
34 |     "ROI_EXTRACTORS",
35 |     "SHARED_HEADS",
36 |     "HEADS",
37 |     "LOSSES",
38 |     "DETECTORS",
39 |     "build_backbone",
40 |     "build_neck",
41 |     "build_roi_extractor",
42 |     "build_shared_head",
43 |     "build_head",
44 |     "build_loss",
45 |     "build_detector",
46 | ]
47 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | _base_ = "coco_detection.py"
 9 | dataset_type = "LVISV05Dataset"
10 | data_root = "data/LVIS/"
11 | data = dict(
12 |     samples_per_gpu=2,
13 |     workers_per_gpu=2,
14 |     train=dict(
15 |         _delete_=True,
16 |         type="ClassBalancedDataset",
17 |         oversample_thr=1e-3,
18 |         dataset=dict(
19 |             type=dataset_type,
20 |             ann_file=data_root + "annotations/lvis_v0.5_train.json",
21 |             # ann_file=data_root + 'annotations/lvis_v0.5_train_10.json',
22 |             img_prefix=data_root + "train2017/",
23 |         ),
24 |     ),
25 |     val=dict(
26 |         type=dataset_type,
27 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
28 |         img_prefix=data_root + "val2017/",
29 |     ),
30 |     test=dict(
31 |         type=dataset_type,
32 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
33 |         img_prefix=data_root + "val2017/",
34 |     ),
35 | )
36 | evaluation = dict(metric=["bbox", "segm"])
37 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_detection_shot.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | _base_ = "coco_detection.py"
 9 | dataset_type = "LVISV05Dataset"
10 | data_root = "data/LVIS/"
11 | data = dict(
12 |     samples_per_gpu=2,
13 |     workers_per_gpu=2,
14 |     train=dict(
15 |         _delete_=True,
16 |         type="ClassBalancedDataset",
17 |         oversample_thr=1e-3,
18 |         dataset=dict(
19 |             type=dataset_type,
20 |             # ann_file=data_root + 'annotations/lvis_v0.5_train.json',
21 |             ann_file=data_root + "annotations/lvis_v0.5_train_10.json",
22 |             img_prefix=data_root + "train2017/",
23 |         ),
24 |     ),
25 |     val=dict(
26 |         type=dataset_type,
27 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
28 |         img_prefix=data_root + "val2017/",
29 |     ),
30 |     test=dict(
31 |         type=dataset_type,
32 |         ann_file=data_root + "annotations/lvis_v0.5_val.json",
33 |         img_prefix=data_root + "val2017/",
34 |     ),
35 | )
36 | evaluation = dict(metric=["bbox", "segm"])
37 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .builder import build_dataloader, build_dataset, DATASETS, PIPELINES
 9 | from .coco import CocoDataset
10 | from .coco_split import CocoSplitDataset
11 | from .coco_split_online import CocoSplitOnlineDataset
12 | from .coco_split_pseudo_masks import CocoSplitPseudoMasksDataset
13 | from .dataset_wrappers import ClassBalancedDataset, ConcatDataset, RepeatDataset
14 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler
15 | from .utils import get_loading_pipeline, replace_ImageToTensor
16 | 
17 | __all__ = [
18 |     "CustomDataset",
19 |     "CocoDataset",
20 |     "GroupSampler",
21 |     "DistributedGroupSampler",
22 |     "DistributedSampler",
23 |     "build_dataloader",
24 |     "ConcatDataset",
25 |     "RepeatDataset",
26 |     "ClassBalancedDataset",
27 |     "DATASETS",
28 |     "PIPELINES",
29 |     "build_dataset",
30 |     "replace_ImageToTensor",
31 |     "get_loading_pipeline" "CocoSplitDataset",
32 |     "CocoSplitPseudoMasksDataset",
33 |     "CocoSplitOnlineDataset",
34 | ]
35 | 


--------------------------------------------------------------------------------
/tests/test_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | from mmdet.core.bbox.coder import YOLOBBoxCoder
10 | 
11 | 
12 | def test_yolo_bbox_coder():
13 |     coder = YOLOBBoxCoder()
14 |     bboxes = torch.Tensor(
15 |         [
16 |             [-42.0, -29.0, 74.0, 61.0],
17 |             [-10.0, -29.0, 106.0, 61.0],
18 |             [22.0, -29.0, 138.0, 61.0],
19 |             [54.0, -29.0, 170.0, 61.0],
20 |         ]
21 |     )
22 |     pred_bboxes = torch.Tensor(
23 |         [
24 |             [0.4709, 0.6152, 0.1690, -0.4056],
25 |             [0.5399, 0.6653, 0.1162, -0.4162],
26 |             [0.4654, 0.6618, 0.1548, -0.4301],
27 |             [0.4786, 0.6197, 0.1896, -0.4479],
28 |         ]
29 |     )
30 |     grid_size = 32
31 |     expected_decode_bboxes = torch.Tensor(
32 |         [
33 |             [-53.6102, -10.3096, 83.7478, 49.6824],
34 |             [-15.8700, -8.3901, 114.4236, 50.9693],
35 |             [11.1822, -8.0924, 146.6034, 50.4476],
36 |             [41.2068, -8.9232, 181.4236, 48.5840],
37 |         ]
38 |     )
39 |     assert expected_decode_bboxes.allclose(coder.decode(bboxes, pred_bboxes, grid_size))
40 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Generic-Grouping
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## Coding Style
30 | * 4 spaces for indentation rather than tabs
31 | 
32 | ## License
33 | By contributing to Generic-Grouping, you agree that your contributions will be licensed
34 | under the LICENSE file in the root directory of this source tree.
35 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import math
 9 | 
10 | import torch
11 | from torch.utils.data import DistributedSampler as _DistributedSampler
12 | 
13 | 
14 | class DistributedSampler(_DistributedSampler):
15 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
16 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
17 |         self.shuffle = shuffle
18 | 
19 |     def __iter__(self):
20 |         # deterministically shuffle based on epoch
21 |         if self.shuffle:
22 |             g = torch.Generator()
23 |             g.manual_seed(self.epoch)
24 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
25 |         else:
26 |             indices = torch.arange(len(self.dataset)).tolist()
27 | 
28 |         # add extra samples to make it evenly divisible
29 |         # in case that indices is shorter than half of total_size
30 |         indices = (indices * math.ceil(self.total_size / len(indices)))[
31 |             : self.total_size
32 |         ]
33 |         assert len(indices) == self.total_size
34 | 
35 |         # subsample
36 |         indices = indices[self.rank : self.total_size : self.num_replicas]
37 |         assert len(indices) == self.num_samples
38 | 
39 |         return iter(indices)
40 | 


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | import subprocess
10 | 
11 | import torch
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description="Process a checkpoint to be published")
16 |     parser.add_argument("in_file", help="input checkpoint filename")
17 |     parser.add_argument("out_file", help="output checkpoint filename")
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def process_checkpoint(in_file, out_file):
23 |     checkpoint = torch.load(in_file, map_location="cpu")
24 |     # remove optimizer for smaller file size
25 |     if "optimizer" in checkpoint:
26 |         del checkpoint["optimizer"]
27 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
28 |     # add the code here.
29 |     torch.save(checkpoint, out_file)
30 |     sha = subprocess.check_output(["sha256sum", out_file]).decode()
31 |     if out_file.endswith(".pth"):
32 |         out_file_name = out_file[:-4]
33 |     else:
34 |         out_file_name = out_file
35 |     final_file = out_file_name + f"-{sha[:8]}.pth"
36 |     subprocess.Popen(["mv", out_file, final_file])
37 | 
38 | 
39 | def main():
40 |     args = parse_args()
41 |     process_checkpoint(args.in_file, args.out_file)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | 


--------------------------------------------------------------------------------
/mmdet/utils/profiling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import contextlib
 9 | import sys
10 | import time
11 | 
12 | import torch
13 | 
14 | if sys.version_info >= (3, 7):
15 | 
16 |     @contextlib.contextmanager
17 |     def profile_time(trace_name, name, enabled=True, stream=None, end_stream=None):
18 |         """Print time spent by CPU and GPU.
19 | 
20 |         Useful as a temporary context manager to find sweet spots of code
21 |         suitable for async implementation.
22 |         """
23 |         if (not enabled) or not torch.cuda.is_available():
24 |             yield
25 |             return
26 |         stream = stream if stream else torch.cuda.current_stream()
27 |         end_stream = end_stream if end_stream else stream
28 |         start = torch.cuda.Event(enable_timing=True)
29 |         end = torch.cuda.Event(enable_timing=True)
30 |         stream.record_event(start)
31 |         try:
32 |             cpu_start = time.monotonic()
33 |             yield
34 |         finally:
35 |             cpu_end = time.monotonic()
36 |             end_stream.record_event(end)
37 |             end.synchronize()
38 |             cpu_time = (cpu_end - cpu_start) * 1000
39 |             gpu_time = start.elapsed_time(end)
40 |             msg = f"{trace_name} {name} cpu_time {cpu_time:.2f} ms "
41 |             msg += f"gpu_time {gpu_time:.2f} ms stream {stream}"
42 |             print(msg, end_stream)
43 | 


--------------------------------------------------------------------------------
/tests/test_models/test_position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import pytest
 9 | import torch
10 | from mmdet.models.utils import LearnedPositionalEncoding, SinePositionalEncoding
11 | 
12 | 
13 | def test_sine_positional_encoding(num_feats=16, batch_size=2):
14 |     # test invalid type of scale
15 |     with pytest.raises(AssertionError):
16 |         module = SinePositionalEncoding(num_feats, scale=(3.0,), normalize=True)
17 | 
18 |     module = SinePositionalEncoding(num_feats)
19 |     h, w = 10, 6
20 |     mask = torch.rand(batch_size, h, w) > 0.5
21 |     assert not module.normalize
22 |     out = module(mask)
23 |     assert out.shape == (batch_size, num_feats * 2, h, w)
24 | 
25 |     # set normalize
26 |     module = SinePositionalEncoding(num_feats, normalize=True)
27 |     assert module.normalize
28 |     out = module(mask)
29 |     assert out.shape == (batch_size, num_feats * 2, h, w)
30 | 
31 | 
32 | def test_learned_positional_encoding(
33 |     num_feats=16, row_num_embed=10, col_num_embed=10, batch_size=2
34 | ):
35 |     module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed)
36 |     assert module.row_embed.weight.shape == (row_num_embed, num_feats)
37 |     assert module.col_embed.weight.shape == (col_num_embed, num_feats)
38 |     h, w = 10, 6
39 |     mask = torch.rand(batch_size, h, w) > 0.5
40 |     out = module(mask)
41 |     assert out.shape == (batch_size, num_feats * 2, h, w)
42 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn_detector.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | from mmdet.core import bbox2result
10 | 
11 | from ..builder import DETECTORS
12 | from .rpn import RPN
13 | 
14 | 
15 | @DETECTORS.register_module()
16 | class RPNDetector(RPN):
17 |     def simple_test(self, img, img_metas, rescale=False):
18 |         """Test function without test time augmentation.
19 | 
20 |         Args:
21 |             imgs (list[torch.Tensor]): List of multiple images
22 |             img_metas (list[dict]): List of image information.
23 |             rescale (bool, optional): Whether to rescale the results.
24 |                 Defaults to False.
25 | 
26 |         Returns:
27 |             list[np.ndarray]: proposals
28 |         """
29 |         x = self.extract_feat(img)
30 |         proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
31 |         if rescale:
32 |             for proposals, meta in zip(proposal_list, img_metas):
33 |                 proposals[:, :4] /= proposals.new_tensor(meta["scale_factor"])
34 | 
35 |         # Convert the rpn-proposals into bbox results format. <
36 |         # proposal_list[0].shape = [200,5]
37 |         bbox_results = []
38 |         for det_bboxes in proposal_list:
39 |             det_labels = torch.zeros((det_bboxes.size(0))).to(det_bboxes.device)
40 |             bbox_results.append(bbox2result(det_bboxes, det_labels, num_classes=1))
41 | 
42 |         return bbox_results
43 |         # >
44 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | 
10 | from .builder import ANCHOR_GENERATORS
11 | 
12 | 
13 | @ANCHOR_GENERATORS.register_module()
14 | class PointGenerator:
15 |     def _meshgrid(self, x, y, row_major=True):
16 |         xx = x.repeat(len(y))
17 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
18 |         if row_major:
19 |             return xx, yy
20 |         else:
21 |             return yy, xx
22 | 
23 |     def grid_points(self, featmap_size, stride=16, device="cuda"):
24 |         feat_h, feat_w = featmap_size
25 |         shift_x = torch.arange(0.0, feat_w, device=device) * stride
26 |         shift_y = torch.arange(0.0, feat_h, device=device) * stride
27 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
28 |         stride = shift_x.new_full((shift_xx.shape[0],), stride)
29 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
30 |         all_points = shifts.to(device)
31 |         return all_points
32 | 
33 |     def valid_flags(self, featmap_size, valid_size, device="cuda"):
34 |         feat_h, feat_w = featmap_size
35 |         valid_h, valid_w = valid_size
36 |         assert valid_h <= feat_h and valid_w <= feat_w
37 |         valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
38 |         valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
39 |         valid_x[:valid_w] = 1
40 |         valid_y[:valid_h] = 1
41 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
42 |         valid = valid_xx & valid_yy
43 |         return valid
44 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import numpy as np
 9 | import pytest
10 | import torch
11 | from mmdet.core.mask.structures import BitmapMasks, PolygonMasks
12 | from mmdet.core.utils import mask2ndarray
13 | 
14 | 
15 | def dummy_raw_polygon_masks(size):
16 |     """
17 |     Args:
18 |         size (tuple): expected shape of dummy masks, (N, H, W)
19 | 
20 |     Return:
21 |         list[list[ndarray]]: dummy mask
22 |     """
23 |     num_obj, heigt, width = size
24 |     polygons = []
25 |     for _ in range(num_obj):
26 |         num_points = np.random.randint(5) * 2 + 6
27 |         polygons.append([np.random.uniform(0, min(heigt, width), num_points)])
28 |     return polygons
29 | 
30 | 
31 | def test_mask2ndarray():
32 |     raw_masks = np.ones((3, 28, 28))
33 |     bitmap_mask = BitmapMasks(raw_masks, 28, 28)
34 |     output_mask = mask2ndarray(bitmap_mask)
35 |     assert np.allclose(raw_masks, output_mask)
36 | 
37 |     raw_masks = dummy_raw_polygon_masks((3, 28, 28))
38 |     polygon_masks = PolygonMasks(raw_masks, 28, 28)
39 |     output_mask = mask2ndarray(polygon_masks)
40 |     assert output_mask.shape == (3, 28, 28)
41 | 
42 |     raw_masks = np.ones((3, 28, 28))
43 |     output_mask = mask2ndarray(raw_masks)
44 |     assert np.allclose(raw_masks, output_mask)
45 | 
46 |     raw_masks = torch.ones((3, 28, 28))
47 |     output_mask = mask2ndarray(raw_masks)
48 |     assert np.allclose(raw_masks, output_mask)
49 | 
50 |     # test unsupported type
51 |     raw_masks = []
52 |     with pytest.raises(TypeError):
53 |         output_mask = mask2ndarray(raw_masks)
54 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.cnn import ConvModule
 9 | from mmdet.models.builder import HEADS
10 | 
11 | from .fcn_mask_head import FCNMaskHead
12 | 
13 | 
14 | @HEADS.register_module()
15 | class HTCMaskHead(FCNMaskHead):
16 |     def __init__(self, with_conv_res=True, *args, **kwargs):
17 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
18 |         self.with_conv_res = with_conv_res
19 |         if self.with_conv_res:
20 |             self.conv_res = ConvModule(
21 |                 self.conv_out_channels,
22 |                 self.conv_out_channels,
23 |                 1,
24 |                 conv_cfg=self.conv_cfg,
25 |                 norm_cfg=self.norm_cfg,
26 |             )
27 | 
28 |     def init_weights(self):
29 |         super(HTCMaskHead, self).init_weights()
30 |         if self.with_conv_res:
31 |             self.conv_res.init_weights()
32 | 
33 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
34 |         if res_feat is not None:
35 |             assert self.with_conv_res
36 |             res_feat = self.conv_res(res_feat)
37 |             x = x + res_feat
38 |         for conv in self.convs:
39 |             x = conv(x)
40 |         res_feat = x
41 |         outs = []
42 |         if return_logits:
43 |             x = self.upsample(x)
44 |             if self.upsample_method == "deconv":
45 |                 x = self.relu(x)
46 |             mask_pred = self.conv_logits(x)
47 |             outs.append(mask_pred)
48 |         if return_feat:
49 |             outs.append(res_feat)
50 |         return outs if len(outs) > 1 else outs[0]
51 | 


--------------------------------------------------------------------------------
/tools/interpolate_extracted_masks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import json
 9 | import multiprocessing as mp
10 | import time
11 | 
12 | import cv2
13 | import numpy as np
14 | from pycocotools import mask as maskUtils
15 | 
16 | 
17 | MASKS_DIR = ""
18 | NUM_SPLITS = 1
19 | 
20 | 
21 | def resize_mask(image_ann):
22 |     new_anns = []
23 |     for ann in image_ann:
24 |         segm = ann["segmentation"]
25 |         mask = maskUtils.decode(segm)
26 |         orig_shape = ann["ori_shape"][:2]
27 |         resized_mask = cv2.resize(
28 |             mask, (orig_shape[1], orig_shape[0]), interpolation=cv2.INTER_NEAREST
29 |         )
30 |         new_rle = maskUtils.encode(np.asfortranarray(resized_mask))
31 |         if type(new_rle["counts"]) == bytes:
32 |             new_rle["counts"] = new_rle["counts"].decode("ascii")
33 |         area = maskUtils.area(new_rle)
34 |         bbox = maskUtils.toBbox(new_rle)
35 |         ann["segmentation"] = new_rle
36 |         ann["area"] = int(area)
37 |         ann["bbox"] = [int(coord) for coord in bbox]
38 |         new_anns.append(ann)
39 |     return new_anns
40 | 
41 | 
42 | SPLITS = range(0, NUM_SPLITS)
43 | for split in SPLITS:
44 |     json_path = f"{MASKS_DIR}/masks_{split}.json"
45 |     output_path = f"{MASKS_DIR}/masks_interpolated_{split}.json"
46 | 
47 |     ann_json = json.load(open(json_path, "rb"))
48 | 
49 |     start = time.perf_counter()
50 | 
51 |     mp_pool = mp.Pool(processes=60)
52 |     resized_masks = mp_pool.map(resize_mask, ann_json)
53 | 
54 |     print(f"finished {split}")
55 |     print(time.perf_counter() - start, "seconds")
56 | 
57 |     json.dump(resized_masks, open(output_path, "w"))
58 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | from ..builder import LOSSES
12 | from .utils import weighted_loss
13 | 
14 | 
15 | @weighted_loss
16 | def mse_loss(pred, target):
17 |     """Warpper of mse loss."""
18 |     return F.mse_loss(pred, target, reduction="none")
19 | 
20 | 
21 | @LOSSES.register_module()
22 | class MSELoss(nn.Module):
23 |     """MSELoss.
24 | 
25 |     Args:
26 |         reduction (str, optional): The method that reduces the loss to a
27 |             scalar. Options are "none", "mean" and "sum".
28 |         loss_weight (float, optional): The weight of the loss. Defaults to 1.0
29 |     """
30 | 
31 |     def __init__(self, reduction="mean", loss_weight=1.0):
32 |         super().__init__()
33 |         self.reduction = reduction
34 |         self.loss_weight = loss_weight
35 | 
36 |     def forward(self, pred, target, weight=None, avg_factor=None):
37 |         """Forward function of loss.
38 | 
39 |         Args:
40 |             pred (torch.Tensor): The prediction.
41 |             target (torch.Tensor): The learning target of the prediction.
42 |             weight (torch.Tensor, optional): Weight of the loss for each
43 |                 prediction. Defaults to None.
44 |             avg_factor (int, optional): Average factor that is used to average
45 |                 the loss. Defaults to None.
46 | 
47 |         Returns:
48 |             torch.Tensor: The calculated loss
49 |         """
50 |         loss = self.loss_weight * mse_loss(
51 |             pred, target, weight, reduction=self.reduction, avg_factor=avg_factor
52 |         )
53 |         return loss
54 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import collections
 9 | 
10 | from mmcv.utils import build_from_cfg
11 | 
12 | from ..builder import PIPELINES
13 | 
14 | 
15 | @PIPELINES.register_module()
16 | class Compose:
17 |     """Compose multiple transforms sequentially.
18 | 
19 |     Args:
20 |         transforms (Sequence[dict | callable]): Sequence of transform object or
21 |             config dict to be composed.
22 |     """
23 | 
24 |     def __init__(self, transforms):
25 |         assert isinstance(transforms, collections.abc.Sequence)
26 |         self.transforms = []
27 |         for transform in transforms:
28 |             if isinstance(transform, dict):
29 |                 transform = build_from_cfg(transform, PIPELINES)
30 |                 self.transforms.append(transform)
31 |             elif callable(transform):
32 |                 self.transforms.append(transform)
33 |             else:
34 |                 raise TypeError("transform must be callable or a dict")
35 | 
36 |     def __call__(self, data):
37 |         """Call function to apply transforms sequentially.
38 | 
39 |         Args:
40 |             data (dict): A result dict contains the data to transform.
41 | 
42 |         Returns:
43 |            dict: Transformed data.
44 |         """
45 | 
46 |         for t in self.transforms:
47 |             data = t(data)
48 |             if data is None:
49 |                 return None
50 |         return data
51 | 
52 |     def __repr__(self):
53 |         format_string = self.__class__.__name__ + "("
54 |         for t in self.transforms:
55 |             format_string += "\n"
56 |             format_string += f"    {t}"
57 |         format_string += "\n)"
58 |         return format_string
59 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | 
10 | from ..builder import BBOX_SAMPLERS
11 | from .base_sampler import BaseSampler
12 | from .sampling_result import SamplingResult
13 | 
14 | 
15 | @BBOX_SAMPLERS.register_module()
16 | class PseudoSampler(BaseSampler):
17 |     """A pseudo sampler that does not do sampling actually."""
18 | 
19 |     def __init__(self, **kwargs):
20 |         pass
21 | 
22 |     def _sample_pos(self, **kwargs):
23 |         """Sample positive samples."""
24 |         raise NotImplementedError
25 | 
26 |     def _sample_neg(self, **kwargs):
27 |         """Sample negative samples."""
28 |         raise NotImplementedError
29 | 
30 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
31 |         """Directly returns the positive and negative indices  of samples.
32 | 
33 |         Args:
34 |             assign_result (:obj:`AssignResult`): Assigned results
35 |             bboxes (torch.Tensor): Bounding boxes
36 |             gt_bboxes (torch.Tensor): Ground truth boxes
37 | 
38 |         Returns:
39 |             :obj:`SamplingResult`: sampler results
40 |         """
41 |         pos_inds = (
42 |             torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
43 |             .squeeze(-1)
44 |             .unique()
45 |         )
46 |         neg_inds = (
47 |             torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
48 |             .squeeze(-1)
49 |             .unique()
50 |         )
51 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
52 |         sampling_result = SamplingResult(
53 |             pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, gt_flags
54 |         )
55 |         return sampling_result
56 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | API Reference
  2 | =================
  3 | 
  4 | mmdet.apis
  5 | --------------
  6 | .. automodule:: mmdet.apis
  7 |     :members:
  8 | 
  9 | mmdet.core
 10 | --------------
 11 | 
 12 | anchor
 13 | ^^^^^^^^^^
 14 | .. automodule:: mmdet.core.anchor
 15 |     :members:
 16 | 
 17 | bbox
 18 | ^^^^^^^^^^
 19 | .. automodule:: mmdet.core.bbox
 20 |     :members:
 21 | 
 22 | export
 23 | ^^^^^^^^^^
 24 | .. automodule:: mmdet.core.export
 25 |     :members:
 26 | 
 27 | mask
 28 | ^^^^^^^^^^
 29 | .. automodule:: mmdet.core.mask
 30 |     :members:
 31 | 
 32 | evaluation
 33 | ^^^^^^^^^^
 34 | .. automodule:: mmdet.core.evaluation
 35 |     :members:
 36 | 
 37 | post_processing
 38 | ^^^^^^^^^^^^^^^
 39 | .. automodule:: mmdet.core.post_processing
 40 |     :members:
 41 | 
 42 | optimizer
 43 | ^^^^^^^^^^
 44 | .. automodule:: mmdet.core.optimizer
 45 |     :members:
 46 | 
 47 | utils
 48 | ^^^^^^^^^^
 49 | .. automodule:: mmdet.core.utils
 50 |     :members:
 51 | 
 52 | mmdet.datasets
 53 | --------------
 54 | 
 55 | datasets
 56 | ^^^^^^^^^^
 57 | .. automodule:: mmdet.datasets
 58 |     :members:
 59 | 
 60 | pipelines
 61 | ^^^^^^^^^^
 62 | .. automodule:: mmdet.datasets.pipelines
 63 |     :members:
 64 | 
 65 | mmdet.models
 66 | --------------
 67 | 
 68 | detectors
 69 | ^^^^^^^^^^
 70 | .. automodule:: mmdet.models.detectors
 71 |     :members:
 72 | 
 73 | backbones
 74 | ^^^^^^^^^^
 75 | .. automodule:: mmdet.models.backbones
 76 |     :members:
 77 | 
 78 | necks
 79 | ^^^^^^^^^^^^
 80 | .. automodule:: mmdet.models.necks
 81 |     :members:
 82 | 
 83 | dense_heads
 84 | ^^^^^^^^^^^^
 85 | .. automodule:: mmdet.models.dense_heads
 86 |     :members:
 87 | 
 88 | roi_heads
 89 | ^^^^^^^^^^
 90 | .. automodule:: mmdet.models.roi_heads
 91 |     :members:
 92 | 
 93 | losses
 94 | ^^^^^^^^^^
 95 | .. automodule:: mmdet.models.losses
 96 |     :members:
 97 | 
 98 | utils
 99 | ^^^^^^^^^^
100 | .. automodule:: mmdet.models.utils
101 |     :members:
102 | 


--------------------------------------------------------------------------------
/docs/stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | #!/usr/bin/env python
 9 | import functools as func
10 | import glob
11 | import os.path as osp
12 | import re
13 | 
14 | import numpy as np
15 | 
16 | url_prefix = "https://github.com/open-mmlab/mmdetection/blob/master/"
17 | 
18 | files = sorted(glob.glob("../configs/*/README.md"))
19 | 
20 | stats = []
21 | titles = []
22 | num_ckpts = 0
23 | 
24 | for f in files:
25 |     url = osp.dirname(f.replace("../", url_prefix))
26 | 
27 |     with open(f, "r") as content_file:
28 |         content = content_file.read()
29 | 
30 |     title = content.split("\n")[0].replace("# ", "").strip()
31 |     ckpts = set(
32 |         x.lower().strip() for x in re.findall(r"\[model\]\((https?.*)\)", content)
33 |     )
34 | 
35 |     if len(ckpts) == 0:
36 |         continue
37 | 
38 |     _papertype = [x for x in re.findall(r"\[([A-Z]+)\]", content)]
39 |     assert len(_papertype) > 0
40 |     papertype = _papertype[0]
41 | 
42 |     paper = set([(papertype, title)])
43 | 
44 |     titles.append(title)
45 |     num_ckpts += len(ckpts)
46 | 
47 |     statsmsg = f"""
48 | \t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)
49 | """
50 |     stats.append((paper, ckpts, statsmsg))
51 | 
52 | allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])
53 | msglist = "\n".join(x for _, _, x in stats)
54 | 
55 | papertypes, papercounts = np.unique([t for t, _ in allpapers], return_counts=True)
56 | countstr = "\n".join([f"   - {t}: {c}" for t, c in zip(papertypes, papercounts)])
57 | 
58 | modelzoo = f"""
59 | # Model Zoo Statistics
60 | 
61 | * Number of papers: {len(set(titles))}
62 | {countstr}
63 | 
64 | * Number of checkpoints: {num_ckpts}
65 | 
66 | {msglist}
67 | """
68 | 
69 | with open("modelzoo_statistics.md", "w") as f:
70 |     f.write(modelzoo)
71 | 


--------------------------------------------------------------------------------
/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # model settings
 9 | input_size = 300
10 | model = dict(
11 |     type="SingleStageDetector",
12 |     pretrained="open-mmlab://vgg16_caffe",
13 |     backbone=dict(
14 |         type="SSDVGG",
15 |         input_size=input_size,
16 |         depth=16,
17 |         with_last_pool=False,
18 |         ceil_mode=True,
19 |         out_indices=(3, 4),
20 |         out_feature_indices=(22, 34),
21 |         l2_norm_scale=20,
22 |     ),
23 |     neck=None,
24 |     bbox_head=dict(
25 |         type="SSDHead",
26 |         in_channels=(512, 1024, 512, 256, 256, 256),
27 |         num_classes=80,
28 |         anchor_generator=dict(
29 |             type="SSDAnchorGenerator",
30 |             scale_major=False,
31 |             input_size=input_size,
32 |             basesize_ratio_range=(0.15, 0.9),
33 |             strides=[8, 16, 32, 64, 100, 300],
34 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]],
35 |         ),
36 |         bbox_coder=dict(
37 |             type="DeltaXYWHBBoxCoder",
38 |             target_means=[0.0, 0.0, 0.0, 0.0],
39 |             target_stds=[0.1, 0.1, 0.2, 0.2],
40 |         ),
41 |     ),
42 |     train_cfg=dict(
43 |         assigner=dict(
44 |             type="MaxIoUAssigner",
45 |             pos_iou_thr=0.5,
46 |             neg_iou_thr=0.5,
47 |             min_pos_iou=0.0,
48 |             ignore_iof_thr=-1,
49 |             gt_max_assign_all=False,
50 |         ),
51 |         smoothl1_beta=1.0,
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         neg_pos_ratio=3,
55 |         debug=False,
56 |     ),
57 |     test_cfg=dict(
58 |         nms=dict(type="nms", iou_threshold=0.45),
59 |         min_bbox_size=0,
60 |         score_thr=0.02,
61 |         max_per_img=200,
62 |     ),
63 | )
64 | cudnn_benchmark = True
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | data/
107 | data
108 | .vscode
109 | .idea
110 | .DS_Store
111 | 
112 | # custom
113 | *.pkl
114 | *.pkl.json
115 | *.log.json
116 | work_dirs/
117 | run_scripts/
118 | 
119 | # Pytorch
120 | *.pth
121 | *.py~
122 | *.sh~
123 | 
124 | # Token
125 | token.txt
126 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/deprecated_fp16_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import warnings
 9 | 
10 | from mmcv.runner import auto_fp16, force_fp32, Fp16OptimizerHook, wrap_fp16_model
11 | 
12 | 
13 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook):
14 |     """A wrapper class for the FP16 optimizer hook. This class wraps
15 |     :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the
16 |     :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated.
17 | 
18 |     Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details.
19 | 
20 |     Args:
21 |         loss_scale (float): Scale factor multiplied with loss.
22 |     """
23 | 
24 |     def __init__(*args, **kwargs):
25 |         super().__init__(*args, **kwargs)
26 |         warnings.warn(
27 |             'Importing Fp16OptimizerHook from "mmdet.core" will be '
28 |             'deprecated in the future. Please import them from "mmcv.runner" '
29 |             "instead"
30 |         )
31 | 
32 | 
33 | def deprecated_auto_fp16(*args, **kwargs):
34 |     warnings.warn(
35 |         'Importing auto_fp16 from "mmdet.core" will be '
36 |         'deprecated in the future. Please import them from "mmcv.runner" '
37 |         "instead"
38 |     )
39 |     return auto_fp16(*args, **kwargs)
40 | 
41 | 
42 | def deprecated_force_fp32(*args, **kwargs):
43 |     warnings.warn(
44 |         'Importing force_fp32 from "mmdet.core" will be '
45 |         'deprecated in the future. Please import them from "mmcv.runner" '
46 |         "instead"
47 |     )
48 |     return force_fp32(*args, **kwargs)
49 | 
50 | 
51 | def deprecated_wrap_fp16_model(*args, **kwargs):
52 |     warnings.warn(
53 |         'Importing wrap_fp16_model from "mmdet.core" will be '
54 |         'deprecated in the future. Please import them from "mmcv.runner" '
55 |         "instead"
56 |     )
57 |     wrap_fp16_model(*args, **kwargs)
58 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def bbox_overlaps(bboxes1, bboxes2, mode="iou", eps=1e-6):
12 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
13 | 
14 |     Args:
15 |         bboxes1(ndarray): shape (n, 4)
16 |         bboxes2(ndarray): shape (k, 4)
17 |         mode(str): iou (intersection over union) or iof (intersection
18 |             over foreground)
19 | 
20 |     Returns:
21 |         ious(ndarray): shape (n, k)
22 |     """
23 | 
24 |     assert mode in ["iou", "iof"]
25 | 
26 |     bboxes1 = bboxes1.astype(np.float32)
27 |     bboxes2 = bboxes2.astype(np.float32)
28 |     rows = bboxes1.shape[0]
29 |     cols = bboxes2.shape[0]
30 |     ious = np.zeros((rows, cols), dtype=np.float32)
31 |     if rows * cols == 0:
32 |         return ious
33 |     exchange = False
34 |     if bboxes1.shape[0] > bboxes2.shape[0]:
35 |         bboxes1, bboxes2 = bboxes2, bboxes1
36 |         ious = np.zeros((cols, rows), dtype=np.float32)
37 |         exchange = True
38 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
39 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
40 |     for i in range(bboxes1.shape[0]):
41 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
42 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
43 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
44 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
45 |         overlap = np.maximum(x_end - x_start, 0) * np.maximum(y_end - y_start, 0)
46 |         if mode == "iou":
47 |             union = area1[i] + area2 - overlap
48 |         else:
49 |             union = area1[i] if not exchange else area2
50 |         union = np.maximum(union, eps)
51 |         ious[i, :] = overlap / union
52 |     if exchange:
53 |         ious = ious.T
54 |     return ious
55 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .assigners import (
 9 |     AssignResult,
10 |     BaseAssigner,
11 |     CenterRegionAssigner,
12 |     MaxIoUAssigner,
13 |     RegionAssigner,
14 | )
15 | from .builder import build_assigner, build_bbox_coder, build_sampler
16 | from .coder import BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, TBLRBBoxCoder
17 | from .iou_calculators import bbox_overlaps, BboxOverlaps2D
18 | from .samplers import (
19 |     BaseSampler,
20 |     CombinedSampler,
21 |     InstanceBalancedPosSampler,
22 |     IoUBalancedNegSampler,
23 |     OHEMSampler,
24 |     PseudoSampler,
25 |     RandomSampler,
26 |     SamplingResult,
27 |     ScoreHLRSampler,
28 | )
29 | from .transforms import (
30 |     bbox2distance,
31 |     bbox2result,
32 |     bbox2roi,
33 |     bbox_cxcywh_to_xyxy,
34 |     bbox_flip,
35 |     bbox_mapping,
36 |     bbox_mapping_back,
37 |     bbox_rescale,
38 |     bbox_xyxy_to_cxcywh,
39 |     distance2bbox,
40 |     roi2bbox,
41 | )
42 | 
43 | __all__ = [
44 |     "bbox_overlaps",
45 |     "BboxOverlaps2D",
46 |     "BaseAssigner",
47 |     "MaxIoUAssigner",
48 |     "AssignResult",
49 |     "BaseSampler",
50 |     "PseudoSampler",
51 |     "RandomSampler",
52 |     "InstanceBalancedPosSampler",
53 |     "IoUBalancedNegSampler",
54 |     "CombinedSampler",
55 |     "OHEMSampler",
56 |     "SamplingResult",
57 |     "ScoreHLRSampler",
58 |     "build_assigner",
59 |     "build_sampler",
60 |     "bbox_flip",
61 |     "bbox_mapping",
62 |     "bbox_mapping_back",
63 |     "bbox2roi",
64 |     "roi2bbox",
65 |     "bbox2result",
66 |     "distance2bbox",
67 |     "bbox2distance",
68 |     "build_bbox_coder",
69 |     "BaseBBoxCoder",
70 |     "PseudoBBoxCoder",
71 |     "DeltaXYWHBBoxCoder",
72 |     "TBLRBBoxCoder",
73 |     "CenterRegionAssigner",
74 |     "bbox_rescale",
75 |     "bbox_cxcywh_to_xyxy",
76 |     "bbox_xyxy_to_cxcywh",
77 |     "RegionAssigner",
78 | ]
79 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .accuracy import Accuracy, accuracy
 9 | from .ae_loss import AssociativeEmbeddingLoss
10 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss
11 | from .cross_entropy_loss import (
12 |     binary_cross_entropy,
13 |     cross_entropy,
14 |     CrossEntropyLoss,
15 |     mask_cross_entropy,
16 | )
17 | from .focal_loss import FocalLoss, sigmoid_focal_loss
18 | from .gaussian_focal_loss import GaussianFocalLoss
19 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss
20 | from .ghm_loss import GHMC, GHMR
21 | from .iou_loss import (
22 |     bounded_iou_loss,
23 |     BoundedIoULoss,
24 |     CIoULoss,
25 |     DIoULoss,
26 |     GIoULoss,
27 |     iou_loss,
28 |     IoULoss,
29 | )
30 | from .mse_loss import mse_loss, MSELoss
31 | from .pisa_loss import carl_loss, isr_p
32 | from .smooth_l1_loss import l1_loss, L1Loss, smooth_l1_loss, SmoothL1Loss
33 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
34 | from .varifocal_loss import VarifocalLoss
35 | 
36 | __all__ = [
37 |     "accuracy",
38 |     "Accuracy",
39 |     "cross_entropy",
40 |     "binary_cross_entropy",
41 |     "mask_cross_entropy",
42 |     "CrossEntropyLoss",
43 |     "sigmoid_focal_loss",
44 |     "FocalLoss",
45 |     "smooth_l1_loss",
46 |     "SmoothL1Loss",
47 |     "balanced_l1_loss",
48 |     "BalancedL1Loss",
49 |     "mse_loss",
50 |     "MSELoss",
51 |     "iou_loss",
52 |     "bounded_iou_loss",
53 |     "IoULoss",
54 |     "BoundedIoULoss",
55 |     "GIoULoss",
56 |     "DIoULoss",
57 |     "CIoULoss",
58 |     "GHMC",
59 |     "GHMR",
60 |     "reduce_loss",
61 |     "weight_reduce_loss",
62 |     "weighted_loss",
63 |     "L1Loss",
64 |     "l1_loss",
65 |     "isr_p",
66 |     "carl_loss",
67 |     "AssociativeEmbeddingLoss",
68 |     "GaussianFocalLoss",
69 |     "QualityFocalLoss",
70 |     "DistributionFocalLoss",
71 |     "VarifocalLoss",
72 | ]
73 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from .auto_augment import (
 9 |     AutoAugment,
10 |     BrightnessTransform,
11 |     ColorTransform,
12 |     ContrastTransform,
13 |     EqualizeTransform,
14 |     Rotate,
15 |     Shear,
16 |     Translate,
17 | )
18 | from .compose import Compose
19 | from .formating import (
20 |     Collect,
21 |     DefaultFormatBundle,
22 |     ImageToTensor,
23 |     to_tensor,
24 |     ToDataContainer,
25 |     ToTensor,
26 |     Transpose,
27 | )
28 | from .instaboost import InstaBoost
29 | from .loading import (
30 |     LoadAnnotations,
31 |     LoadImageFromFile,
32 |     LoadImageFromWebcam,
33 |     LoadMultiChannelImageFromFiles,
34 |     LoadProposals,
35 | )
36 | from .test_time_aug import MultiScaleFlipAug
37 | from .transforms import (
38 |     Albu,
39 |     CutOut,
40 |     Expand,
41 |     MinIoURandomCrop,
42 |     Normalize,
43 |     Pad,
44 |     PhotoMetricDistortion,
45 |     RandomCenterCropPad,
46 |     RandomCrop,
47 |     RandomFlip,
48 |     Resize,
49 |     SegRescale,
50 | )
51 | 
52 | __all__ = [
53 |     "Compose",
54 |     "to_tensor",
55 |     "ToTensor",
56 |     "ImageToTensor",
57 |     "ToDataContainer",
58 |     "Transpose",
59 |     "Collect",
60 |     "DefaultFormatBundle",
61 |     "LoadAnnotations",
62 |     "LoadImageFromFile",
63 |     "LoadImageFromWebcam",
64 |     "LoadMultiChannelImageFromFiles",
65 |     "LoadProposals",
66 |     "MultiScaleFlipAug",
67 |     "Resize",
68 |     "RandomFlip",
69 |     "Pad",
70 |     "RandomCrop",
71 |     "Normalize",
72 |     "SegRescale",
73 |     "MinIoURandomCrop",
74 |     "Expand",
75 |     "PhotoMetricDistortion",
76 |     "Albu",
77 |     "InstaBoost",
78 |     "RandomCenterCropPad",
79 |     "AutoAugment",
80 |     "CutOut",
81 |     "Shear",
82 |     "Rotate",
83 |     "ColorTransform",
84 |     "EqualizeTransform",
85 |     "BrightnessTransform",
86 |     "ContrastTransform",
87 |     "Translate",
88 | ]
89 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | dataset_type = "CocoDataset"
 9 | data_root = "data/coco/"
10 | img_norm_cfg = dict(
11 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
12 | )
13 | train_pipeline = [
14 |     dict(type="LoadImageFromFile"),
15 |     dict(type="LoadAnnotations", with_bbox=True),
16 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
17 |     dict(type="RandomFlip", flip_ratio=0.5),
18 |     dict(type="Normalize", **img_norm_cfg),
19 |     dict(type="Pad", size_divisor=32),
20 |     dict(type="DefaultFormatBundle"),
21 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
22 | ]
23 | test_pipeline = [
24 |     dict(type="LoadImageFromFile"),
25 |     dict(
26 |         type="MultiScaleFlipAug",
27 |         img_scale=(1333, 800),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type="Resize", keep_ratio=True),
31 |             dict(type="RandomFlip"),
32 |             dict(type="Normalize", **img_norm_cfg),
33 |             dict(type="Pad", size_divisor=32),
34 |             dict(type="ImageToTensor", keys=["img"]),
35 |             dict(type="Collect", keys=["img"]),
36 |         ],
37 |     ),
38 | ]
39 | data = dict(
40 |     samples_per_gpu=2,
41 |     workers_per_gpu=2,
42 |     train=dict(
43 |         type=dataset_type,
44 |         ann_file=data_root + "annotations/instances_train2017.json",
45 |         img_prefix=data_root + "train2017/",
46 |         pipeline=train_pipeline,
47 |     ),
48 |     val=dict(
49 |         type=dataset_type,
50 |         ann_file=data_root + "annotations/instances_val2017.json",
51 |         img_prefix=data_root + "val2017/",
52 |         pipeline=test_pipeline,
53 |     ),
54 |     test=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + "annotations/instances_val2017.json",
57 |         img_prefix=data_root + "val2017/",
58 |         pipeline=test_pipeline,
59 |     ),
60 | )
61 | evaluation = dict(interval=1, metric="bbox")
62 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | dataset_type = "CocoDataset"
 9 | data_root = "data/coco/"
10 | img_norm_cfg = dict(
11 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
12 | )
13 | train_pipeline = [
14 |     dict(type="LoadImageFromFile"),
15 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
16 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
17 |     dict(type="RandomFlip", flip_ratio=0.5),
18 |     dict(type="Normalize", **img_norm_cfg),
19 |     dict(type="Pad", size_divisor=32),
20 |     dict(type="DefaultFormatBundle"),
21 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]),
22 | ]
23 | test_pipeline = [
24 |     dict(type="LoadImageFromFile"),
25 |     dict(
26 |         type="MultiScaleFlipAug",
27 |         img_scale=(1333, 800),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type="Resize", keep_ratio=True),
31 |             dict(type="RandomFlip"),
32 |             dict(type="Normalize", **img_norm_cfg),
33 |             dict(type="Pad", size_divisor=32),
34 |             dict(type="ImageToTensor", keys=["img"]),
35 |             dict(type="Collect", keys=["img"]),
36 |         ],
37 |     ),
38 | ]
39 | data = dict(
40 |     samples_per_gpu=2,
41 |     workers_per_gpu=2,
42 |     train=dict(
43 |         type=dataset_type,
44 |         ann_file=data_root + "annotations/instances_train2017.json",
45 |         img_prefix=data_root + "train2017/",
46 |         pipeline=train_pipeline,
47 |     ),
48 |     val=dict(
49 |         type=dataset_type,
50 |         ann_file=data_root + "annotations/instances_val2017.json",
51 |         img_prefix=data_root + "val2017/",
52 |         pipeline=test_pipeline,
53 |     ),
54 |     test=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + "annotations/instances_val2017.json",
57 |         img_prefix=data_root + "val2017/",
58 |         pipeline=test_pipeline,
59 |     ),
60 | )
61 | evaluation = dict(metric=["bbox", "segm"])
62 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/demodata.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | 
12 | def ensure_rng(rng=None):
13 |     """Simple version of the ``kwarray.ensure_rng``
14 | 
15 |     Args:
16 |         rng (int | numpy.random.RandomState | None):
17 |             if None, then defaults to the global rng. Otherwise this can be an
18 |             integer or a RandomState class
19 |     Returns:
20 |         (numpy.random.RandomState) : rng -
21 |             a numpy random number generator
22 | 
23 |     References:
24 |         https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
25 |     """
26 | 
27 |     if rng is None:
28 |         rng = np.random.mtrand._rand
29 |     elif isinstance(rng, int):
30 |         rng = np.random.RandomState(rng)
31 |     else:
32 |         rng = rng
33 |     return rng
34 | 
35 | 
36 | def random_boxes(num=1, scale=1, rng=None):
37 |     """Simple version of ``kwimage.Boxes.random``
38 | 
39 |     Returns:
40 |         Tensor: shape (n, 4) in x1, y1, x2, y2 format.
41 | 
42 |     References:
43 |         https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
44 | 
45 |     Example:
46 |         >>> num = 3
47 |         >>> scale = 512
48 |         >>> rng = 0
49 |         >>> boxes = random_boxes(num, scale, rng)
50 |         >>> print(boxes)
51 |         tensor([[280.9925, 278.9802, 308.6148, 366.1769],
52 |                 [216.9113, 330.6978, 224.0446, 456.5878],
53 |                 [405.3632, 196.3221, 493.3953, 270.7942]])
54 |     """
55 |     rng = ensure_rng(rng)
56 | 
57 |     tlbr = rng.rand(num, 4).astype(np.float32)
58 | 
59 |     tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
60 |     tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
61 |     br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
62 |     br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
63 | 
64 |     tlbr[:, 0] = tl_x * scale
65 |     tlbr[:, 1] = tl_y * scale
66 |     tlbr[:, 2] = br_x * scale
67 |     tlbr[:, 3] = br_y * scale
68 | 
69 |     boxes = torch.from_numpy(tlbr)
70 |     return boxes
71 | 


--------------------------------------------------------------------------------
/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # model settings
 9 | model = dict(
10 |     type="RetinaNet",
11 |     pretrained="torchvision://resnet50",
12 |     backbone=dict(
13 |         type="ResNet",
14 |         depth=50,
15 |         num_stages=4,
16 |         out_indices=(0, 1, 2, 3),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type="BN", requires_grad=True),
19 |         norm_eval=True,
20 |         style="pytorch",
21 |     ),
22 |     neck=dict(
23 |         type="FPN",
24 |         in_channels=[256, 512, 1024, 2048],
25 |         out_channels=256,
26 |         start_level=1,
27 |         add_extra_convs="on_input",
28 |         num_outs=5,
29 |     ),
30 |     bbox_head=dict(
31 |         type="RetinaHead",
32 |         num_classes=80,
33 |         in_channels=256,
34 |         stacked_convs=4,
35 |         feat_channels=256,
36 |         anchor_generator=dict(
37 |             type="AnchorGenerator",
38 |             octave_base_scale=4,
39 |             scales_per_octave=3,
40 |             ratios=[0.5, 1.0, 2.0],
41 |             strides=[8, 16, 32, 64, 128],
42 |         ),
43 |         bbox_coder=dict(
44 |             type="DeltaXYWHBBoxCoder",
45 |             target_means=[0.0, 0.0, 0.0, 0.0],
46 |             target_stds=[1.0, 1.0, 1.0, 1.0],
47 |         ),
48 |         loss_cls=dict(
49 |             type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0
50 |         ),
51 |         loss_bbox=dict(type="L1Loss", loss_weight=1.0),
52 |     ),
53 |     # training and testing settings
54 |     train_cfg=dict(
55 |         assigner=dict(
56 |             type="MaxIoUAssigner",
57 |             pos_iou_thr=0.5,
58 |             neg_iou_thr=0.4,
59 |             min_pos_iou=0,
60 |             ignore_iof_thr=-1,
61 |         ),
62 |         allowed_border=-1,
63 |         pos_weight=-1,
64 |         debug=False,
65 |     ),
66 |     test_cfg=dict(
67 |         nms_pre=1000,
68 |         min_bbox_size=0,
69 |         score_thr=0.05,
70 |         nms=dict(type="nms", iou_threshold=0.5),
71 |         max_per_img=100,
72 |     ),
73 | )
74 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from functools import partial
 9 | 
10 | import numpy as np
11 | import torch
12 | from six.moves import map, zip
13 | 
14 | from ..mask.structures import BitmapMasks, PolygonMasks
15 | 
16 | 
17 | def multi_apply(func, *args, **kwargs):
18 |     """Apply function to a list of arguments.
19 | 
20 |     Note:
21 |         This function applies the ``func`` to multiple inputs and
22 |         map the multiple outputs of the ``func`` into different
23 |         list. Each list contains the same type of outputs corresponding
24 |         to different inputs.
25 | 
26 |     Args:
27 |         func (Function): A function that will be applied to a list of
28 |             arguments
29 | 
30 |     Returns:
31 |         tuple(list): A tuple containing multiple list, each list contains \
32 |             a kind of returned results by the function
33 |     """
34 |     pfunc = partial(func, **kwargs) if kwargs else func
35 |     map_results = map(pfunc, *args)
36 |     return tuple(map(list, zip(*map_results)))
37 | 
38 | 
39 | def unmap(data, count, inds, fill=0):
40 |     """Unmap a subset of item (data) back to the original set of items (of size
41 |     count)"""
42 |     if data.dim() == 1:
43 |         ret = data.new_full((count,), fill)
44 |         ret[inds.type(torch.bool)] = data
45 |     else:
46 |         new_size = (count,) + data.size()[1:]
47 |         ret = data.new_full(new_size, fill)
48 |         ret[inds.type(torch.bool), :] = data
49 |     return ret
50 | 
51 | 
52 | def mask2ndarray(mask):
53 |     """Convert Mask to ndarray..
54 | 
55 |     Args:
56 |         mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or
57 |         torch.Tensor or np.ndarray): The mask to be converted.
58 | 
59 |     Returns:
60 |         np.ndarray: Ndarray mask of shape (n, h, w) that has been converted
61 |     """
62 |     if isinstance(mask, (BitmapMasks, PolygonMasks)):
63 |         mask = mask.to_ndarray()
64 |     elif isinstance(mask, torch.Tensor):
65 |         mask = mask.detach().cpu().numpy()
66 |     elif not isinstance(mask, np.ndarray):
67 |         raise TypeError(f"Unsupported {type(mask)} data type")
68 |     return mask
69 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # model settings
 9 | model = dict(
10 |     type="RPN",
11 |     pretrained="open-mmlab://detectron2/resnet50_caffe",
12 |     backbone=dict(
13 |         type="ResNet",
14 |         depth=50,
15 |         num_stages=3,
16 |         strides=(1, 2, 2),
17 |         dilations=(1, 1, 1),
18 |         out_indices=(2,),
19 |         frozen_stages=1,
20 |         norm_cfg=dict(type="BN", requires_grad=False),
21 |         norm_eval=True,
22 |         style="caffe",
23 |     ),
24 |     neck=None,
25 |     rpn_head=dict(
26 |         type="RPNHead",
27 |         in_channels=1024,
28 |         feat_channels=1024,
29 |         anchor_generator=dict(
30 |             type="AnchorGenerator",
31 |             scales=[2, 4, 8, 16, 32],
32 |             ratios=[0.5, 1.0, 2.0],
33 |             strides=[16],
34 |         ),
35 |         bbox_coder=dict(
36 |             type="DeltaXYWHBBoxCoder",
37 |             target_means=[0.0, 0.0, 0.0, 0.0],
38 |             target_stds=[1.0, 1.0, 1.0, 1.0],
39 |         ),
40 |         loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
41 |         loss_bbox=dict(type="L1Loss", loss_weight=1.0),
42 |     ),
43 |     # model training and testing settings
44 |     train_cfg=dict(
45 |         rpn=dict(
46 |             assigner=dict(
47 |                 type="MaxIoUAssigner",
48 |                 pos_iou_thr=0.7,
49 |                 neg_iou_thr=0.3,
50 |                 min_pos_iou=0.3,
51 |                 ignore_iof_thr=-1,
52 |             ),
53 |             sampler=dict(
54 |                 type="RandomSampler",
55 |                 num=256,
56 |                 pos_fraction=0.5,
57 |                 neg_pos_ub=-1,
58 |                 add_gt_as_proposals=False,
59 |             ),
60 |             allowed_border=0,
61 |             pos_weight=-1,
62 |             debug=False,
63 |         )
64 |     ),
65 |     test_cfg=dict(
66 |         rpn=dict(
67 |             nms_across_levels=False,
68 |             nms_pre=12000,
69 |             nms_post=2000,
70 |             max_num=2000,
71 |             nms_thr=0.7,
72 |             min_bbox_size=0,
73 |         )
74 |     ),
75 | )
76 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # dataset settings
 9 | dataset_type = "DeepFashionDataset"
10 | data_root = "data/DeepFashion/In-shop/"
11 | img_norm_cfg = dict(
12 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
13 | )
14 | train_pipeline = [
15 |     dict(type="LoadImageFromFile"),
16 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
17 |     dict(type="Resize", img_scale=(750, 1101), keep_ratio=True),
18 |     dict(type="RandomFlip", flip_ratio=0.5),
19 |     dict(type="Normalize", **img_norm_cfg),
20 |     dict(type="Pad", size_divisor=32),
21 |     dict(type="DefaultFormatBundle"),
22 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]),
23 | ]
24 | test_pipeline = [
25 |     dict(type="LoadImageFromFile"),
26 |     dict(
27 |         type="MultiScaleFlipAug",
28 |         img_scale=(750, 1101),
29 |         flip=False,
30 |         transforms=[
31 |             dict(type="Resize", keep_ratio=True),
32 |             dict(type="RandomFlip"),
33 |             dict(type="Normalize", **img_norm_cfg),
34 |             dict(type="Pad", size_divisor=32),
35 |             dict(type="ImageToTensor", keys=["img"]),
36 |             dict(type="Collect", keys=["img"]),
37 |         ],
38 |     ),
39 | ]
40 | data = dict(
41 |     imgs_per_gpu=2,
42 |     workers_per_gpu=1,
43 |     train=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + "annotations/DeepFashion_segmentation_query.json",
46 |         img_prefix=data_root + "Img/",
47 |         pipeline=train_pipeline,
48 |         data_root=data_root,
49 |     ),
50 |     val=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + "annotations/DeepFashion_segmentation_query.json",
53 |         img_prefix=data_root + "Img/",
54 |         pipeline=test_pipeline,
55 |         data_root=data_root,
56 |     ),
57 |     test=dict(
58 |         type=dataset_type,
59 |         ann_file=data_root + "annotations/DeepFashion_segmentation_gallery.json",
60 |         img_prefix=data_root + "Img/",
61 |         pipeline=test_pipeline,
62 |         data_root=data_root,
63 |     ),
64 | )
65 | evaluation = dict(interval=5, metric=["bbox", "segm"])
66 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # model settings
 9 | model = dict(
10 |     type="RPN",
11 |     pretrained="torchvision://resnet50",
12 |     backbone=dict(
13 |         type="ResNet",
14 |         depth=50,
15 |         num_stages=4,
16 |         out_indices=(0, 1, 2, 3),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type="BN", requires_grad=True),
19 |         norm_eval=True,
20 |         style="pytorch",
21 |     ),
22 |     neck=dict(
23 |         type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5
24 |     ),
25 |     rpn_head=dict(
26 |         type="RPNHead",
27 |         in_channels=256,
28 |         feat_channels=256,
29 |         anchor_generator=dict(
30 |             type="AnchorGenerator",
31 |             scales=[8],
32 |             ratios=[0.5, 1.0, 2.0],
33 |             strides=[4, 8, 16, 32, 64],
34 |         ),
35 |         bbox_coder=dict(
36 |             type="DeltaXYWHBBoxCoder",
37 |             target_means=[0.0, 0.0, 0.0, 0.0],
38 |             target_stds=[1.0, 1.0, 1.0, 1.0],
39 |         ),
40 |         loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
41 |         loss_bbox=dict(type="L1Loss", loss_weight=1.0),
42 |     ),
43 |     # model training and testing settings
44 |     train_cfg=dict(
45 |         rpn=dict(
46 |             assigner=dict(
47 |                 type="MaxIoUAssigner",
48 |                 pos_iou_thr=0.7,
49 |                 neg_iou_thr=0.3,
50 |                 min_pos_iou=0.3,
51 |                 ignore_iof_thr=-1,
52 |             ),
53 |             sampler=dict(
54 |                 type="RandomSampler",
55 |                 num=256,
56 |                 pos_fraction=0.5,
57 |                 neg_pos_ub=-1,
58 |                 add_gt_as_proposals=False,
59 |             ),
60 |             allowed_border=0,
61 |             pos_weight=-1,
62 |             debug=False,
63 |         )
64 |     ),
65 |     test_cfg=dict(
66 |         rpn=dict(
67 |             nms_across_levels=False,
68 |             nms_pre=2000,
69 |             nms_post=1000,
70 |             max_num=1000,
71 |             nms_thr=0.7,
72 |             min_bbox_size=0,
73 |         )
74 |     ),
75 | )
76 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | dataset_type = "CityscapesDataset"
 9 | data_root = "data/cityscapes/"
10 | img_norm_cfg = dict(
11 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
12 | )
13 | train_pipeline = [
14 |     dict(type="LoadImageFromFile"),
15 |     dict(type="LoadAnnotations", with_bbox=True),
16 |     dict(type="Resize", img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
17 |     dict(type="RandomFlip", flip_ratio=0.5),
18 |     dict(type="Normalize", **img_norm_cfg),
19 |     dict(type="Pad", size_divisor=32),
20 |     dict(type="DefaultFormatBundle"),
21 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
22 | ]
23 | test_pipeline = [
24 |     dict(type="LoadImageFromFile"),
25 |     dict(
26 |         type="MultiScaleFlipAug",
27 |         img_scale=(2048, 1024),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type="Resize", keep_ratio=True),
31 |             dict(type="RandomFlip"),
32 |             dict(type="Normalize", **img_norm_cfg),
33 |             dict(type="Pad", size_divisor=32),
34 |             dict(type="ImageToTensor", keys=["img"]),
35 |             dict(type="Collect", keys=["img"]),
36 |         ],
37 |     ),
38 | ]
39 | data = dict(
40 |     samples_per_gpu=1,
41 |     workers_per_gpu=2,
42 |     train=dict(
43 |         type="RepeatDataset",
44 |         times=8,
45 |         dataset=dict(
46 |             type=dataset_type,
47 |             ann_file=data_root + "annotations/instancesonly_filtered_gtFine_train.json",
48 |             img_prefix=data_root + "leftImg8bit/train/",
49 |             pipeline=train_pipeline,
50 |         ),
51 |     ),
52 |     val=dict(
53 |         type=dataset_type,
54 |         ann_file=data_root + "annotations/instancesonly_filtered_gtFine_val.json",
55 |         img_prefix=data_root + "leftImg8bit/val/",
56 |         pipeline=test_pipeline,
57 |     ),
58 |     test=dict(
59 |         type=dataset_type,
60 |         ann_file=data_root + "annotations/instancesonly_filtered_gtFine_test.json",
61 |         img_prefix=data_root + "leftImg8bit/test/",
62 |         pipeline=test_pipeline,
63 |     ),
64 | )
65 | evaluation = dict(interval=1, metric="bbox")
66 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | dataset_type = "CocoDataset"
 9 | data_root = "data/coco/"
10 | img_norm_cfg = dict(
11 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
12 | )
13 | train_pipeline = [
14 |     dict(type="LoadImageFromFile"),
15 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True, with_seg=True),
16 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
17 |     dict(type="RandomFlip", flip_ratio=0.5),
18 |     dict(type="Normalize", **img_norm_cfg),
19 |     dict(type="Pad", size_divisor=32),
20 |     dict(type="SegRescale", scale_factor=1 / 8),
21 |     dict(type="DefaultFormatBundle"),
22 |     dict(
23 |         type="Collect",
24 |         keys=["img", "gt_bboxes", "gt_labels", "gt_masks", "gt_semantic_seg"],
25 |     ),
26 | ]
27 | test_pipeline = [
28 |     dict(type="LoadImageFromFile"),
29 |     dict(
30 |         type="MultiScaleFlipAug",
31 |         img_scale=(1333, 800),
32 |         flip=False,
33 |         transforms=[
34 |             dict(type="Resize", keep_ratio=True),
35 |             dict(type="RandomFlip", flip_ratio=0.5),
36 |             dict(type="Normalize", **img_norm_cfg),
37 |             dict(type="Pad", size_divisor=32),
38 |             dict(type="ImageToTensor", keys=["img"]),
39 |             dict(type="Collect", keys=["img"]),
40 |         ],
41 |     ),
42 | ]
43 | data = dict(
44 |     samples_per_gpu=2,
45 |     workers_per_gpu=2,
46 |     train=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root + "annotations/instances_train2017.json",
49 |         img_prefix=data_root + "train2017/",
50 |         seg_prefix=data_root + "stuffthingmaps/train2017/",
51 |         pipeline=train_pipeline,
52 |     ),
53 |     val=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + "annotations/instances_val2017.json",
56 |         img_prefix=data_root + "val2017/",
57 |         pipeline=test_pipeline,
58 |     ),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + "annotations/instances_val2017.json",
62 |         img_prefix=data_root + "val2017/",
63 |         pipeline=test_pipeline,
64 |     ),
65 | )
66 | evaluation = dict(metric=["bbox", "segm"])
67 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | dataset_type = "CityscapesDataset"
 9 | data_root = "data/cityscapes/"
10 | img_norm_cfg = dict(
11 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
12 | )
13 | train_pipeline = [
14 |     dict(type="LoadImageFromFile"),
15 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
16 |     dict(type="Resize", img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
17 |     dict(type="RandomFlip", flip_ratio=0.5),
18 |     dict(type="Normalize", **img_norm_cfg),
19 |     dict(type="Pad", size_divisor=32),
20 |     dict(type="DefaultFormatBundle"),
21 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]),
22 | ]
23 | test_pipeline = [
24 |     dict(type="LoadImageFromFile"),
25 |     dict(
26 |         type="MultiScaleFlipAug",
27 |         img_scale=(2048, 1024),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type="Resize", keep_ratio=True),
31 |             dict(type="RandomFlip"),
32 |             dict(type="Normalize", **img_norm_cfg),
33 |             dict(type="Pad", size_divisor=32),
34 |             dict(type="ImageToTensor", keys=["img"]),
35 |             dict(type="Collect", keys=["img"]),
36 |         ],
37 |     ),
38 | ]
39 | data = dict(
40 |     samples_per_gpu=1,
41 |     workers_per_gpu=2,
42 |     train=dict(
43 |         type="RepeatDataset",
44 |         times=8,
45 |         dataset=dict(
46 |             type=dataset_type,
47 |             ann_file=data_root + "annotations/instancesonly_filtered_gtFine_train.json",
48 |             img_prefix=data_root + "leftImg8bit/train/",
49 |             pipeline=train_pipeline,
50 |         ),
51 |     ),
52 |     val=dict(
53 |         type=dataset_type,
54 |         ann_file=data_root + "annotations/instancesonly_filtered_gtFine_val.json",
55 |         img_prefix=data_root + "leftImg8bit/val/",
56 |         pipeline=test_pipeline,
57 |     ),
58 |     test=dict(
59 |         type=dataset_type,
60 |         ann_file=data_root + "annotations/instancesonly_filtered_gtFine_test.json",
61 |         img_prefix=data_root + "leftImg8bit/test/",
62 |         pipeline=test_pipeline,
63 |     ),
64 | )
65 | evaluation = dict(metric=["bbox", "segm"])
66 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # dataset settings
 9 | dataset_type = "VOCDataset"
10 | data_root = "data/VOCdevkit/"
11 | img_norm_cfg = dict(
12 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
13 | )
14 | train_pipeline = [
15 |     dict(type="LoadImageFromFile"),
16 |     dict(type="LoadAnnotations", with_bbox=True),
17 |     dict(type="Resize", img_scale=(1000, 600), keep_ratio=True),
18 |     dict(type="RandomFlip", flip_ratio=0.5),
19 |     dict(type="Normalize", **img_norm_cfg),
20 |     dict(type="Pad", size_divisor=32),
21 |     dict(type="DefaultFormatBundle"),
22 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
23 | ]
24 | test_pipeline = [
25 |     dict(type="LoadImageFromFile"),
26 |     dict(
27 |         type="MultiScaleFlipAug",
28 |         img_scale=(1000, 600),
29 |         flip=False,
30 |         transforms=[
31 |             dict(type="Resize", keep_ratio=True),
32 |             dict(type="RandomFlip"),
33 |             dict(type="Normalize", **img_norm_cfg),
34 |             dict(type="Pad", size_divisor=32),
35 |             dict(type="ImageToTensor", keys=["img"]),
36 |             dict(type="Collect", keys=["img"]),
37 |         ],
38 |     ),
39 | ]
40 | data = dict(
41 |     samples_per_gpu=2,
42 |     workers_per_gpu=2,
43 |     train=dict(
44 |         type="RepeatDataset",
45 |         times=3,
46 |         dataset=dict(
47 |             type=dataset_type,
48 |             ann_file=[
49 |                 data_root + "VOC2007/ImageSets/Main/trainval.txt",
50 |                 data_root + "VOC2012/ImageSets/Main/trainval.txt",
51 |             ],
52 |             img_prefix=[data_root + "VOC2007/", data_root + "VOC2012/"],
53 |             pipeline=train_pipeline,
54 |         ),
55 |     ),
56 |     val=dict(
57 |         type=dataset_type,
58 |         ann_file=data_root + "VOC2007/ImageSets/Main/test.txt",
59 |         img_prefix=data_root + "VOC2007/",
60 |         pipeline=test_pipeline,
61 |     ),
62 |     test=dict(
63 |         type=dataset_type,
64 |         ann_file=data_root + "VOC2007/ImageSets/Main/test.txt",
65 |         img_prefix=data_root + "VOC2007/",
66 |         pipeline=test_pipeline,
67 |     ),
68 | )
69 | evaluation = dict(interval=1, metric="mAP")
70 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/base_dense_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from abc import ABCMeta, abstractmethod
 9 | 
10 | import torch.nn as nn
11 | 
12 | 
13 | class BaseDenseHead(nn.Module, metaclass=ABCMeta):
14 |     """Base class for DenseHeads."""
15 | 
16 |     def __init__(self):
17 |         super(BaseDenseHead, self).__init__()
18 | 
19 |     @abstractmethod
20 |     def loss(self, **kwargs):
21 |         """Compute losses of the head."""
22 |         pass
23 | 
24 |     @abstractmethod
25 |     def get_bboxes(self, **kwargs):
26 |         """Transform network output for a batch into bbox predictions."""
27 |         pass
28 | 
29 |     def forward_train(
30 |         self,
31 |         x,
32 |         img_metas,
33 |         gt_bboxes,
34 |         gt_labels=None,
35 |         gt_bboxes_ignore=None,
36 |         proposal_cfg=None,
37 |         **kwargs,
38 |     ):
39 |         """
40 |         Args:
41 |             x (list[Tensor]): Features from FPN.
42 |             img_metas (list[dict]): Meta information of each image, e.g.,
43 |                 image size, scaling factor, etc.
44 |             gt_bboxes (Tensor): Ground truth bboxes of the image,
45 |                 shape (num_gts, 4).
46 |             gt_labels (Tensor): Ground truth labels of each box,
47 |                 shape (num_gts,).
48 |             gt_bboxes_ignore (Tensor): Ground truth bboxes to be
49 |                 ignored, shape (num_ignored_gts, 4).
50 |             proposal_cfg (mmcv.Config): Test / postprocessing configuration,
51 |                 if None, test_cfg would be used
52 | 
53 |         Returns:
54 |             tuple:
55 |                 losses: (dict[str, Tensor]): A dictionary of loss components.
56 |                 proposal_list (list[Tensor]): Proposals of each image.
57 |         """
58 |         outs = self(x)
59 |         if gt_labels is None:
60 |             loss_inputs = outs + (gt_bboxes, img_metas)
61 |         else:
62 |             loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
63 |         losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
64 |         if proposal_cfg is None:
65 |             return losses
66 |         else:
67 |             proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)
68 |             return losses, proposal_list
69 | 


--------------------------------------------------------------------------------
/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from mmcv import Config
12 | from mmdet.models import build_detector
13 | 
14 | try:
15 |     from mmcv.cnn import get_model_complexity_info
16 | except ImportError:
17 |     raise ImportError("Please upgrade mmcv to >0.6.2")
18 | 
19 | 
20 | def parse_args():
21 |     parser = argparse.ArgumentParser(description="Train a detector")
22 |     parser.add_argument("config", help="train config file path")
23 |     parser.add_argument(
24 |         "--shape", type=int, nargs="+", default=[1280, 800], help="input image size"
25 |     )
26 |     args = parser.parse_args()
27 |     return args
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 | 
33 |     if len(args.shape) == 1:
34 |         input_shape = (3, args.shape[0], args.shape[0])
35 |     elif len(args.shape) == 2:
36 |         input_shape = (3,) + tuple(args.shape)
37 |     else:
38 |         raise ValueError("invalid input shape")
39 | 
40 |     cfg = Config.fromfile(args.config)
41 |     # import modules from string list.
42 |     if cfg.get("custom_imports", None):
43 |         from mmcv.utils import import_modules_from_strings
44 | 
45 |         import_modules_from_strings(**cfg["custom_imports"])
46 | 
47 |     model = build_detector(
48 |         cfg.model, train_cfg=cfg.get("train_cfg"), test_cfg=cfg.get("test_cfg")
49 |     )
50 |     if torch.cuda.is_available():
51 |         model.cuda()
52 |     model.eval()
53 | 
54 |     if hasattr(model, "forward_dummy"):
55 |         model.forward = model.forward_dummy
56 |     else:
57 |         raise NotImplementedError(
58 |             "FLOPs counter is currently not currently supported with {}".format(
59 |                 model.__class__.__name__
60 |             )
61 |         )
62 | 
63 |     flops, params = get_model_complexity_info(model, input_shape)
64 |     split_line = "=" * 30
65 |     print(
66 |         f"{split_line}\nInput shape: {input_shape}\n"
67 |         f"Flops: {flops}\nParams: {params}\n{split_line}"
68 |     )
69 |     print(
70 |         "!!!Please be cautious if you use the results in papers. "
71 |         "You may need to check if all ops are supported and verify that the "
72 |         "flops computation is correct."
73 |     )
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     main()
78 | 


--------------------------------------------------------------------------------
/tests/test_data/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import pytest
 9 | from mmdet.datasets import replace_ImageToTensor
10 | 
11 | 
12 | def test_replace_ImageToTensor():
13 |     # with MultiScaleFlipAug
14 |     pipelines = [
15 |         dict(type="LoadImageFromFile"),
16 |         dict(
17 |             type="MultiScaleFlipAug",
18 |             img_scale=(1333, 800),
19 |             flip=False,
20 |             transforms=[
21 |                 dict(type="Resize", keep_ratio=True),
22 |                 dict(type="RandomFlip"),
23 |                 dict(type="Normalize"),
24 |                 dict(type="Pad", size_divisor=32),
25 |                 dict(type="ImageToTensor", keys=["img"]),
26 |                 dict(type="Collect", keys=["img"]),
27 |             ],
28 |         ),
29 |     ]
30 |     expected_pipelines = [
31 |         dict(type="LoadImageFromFile"),
32 |         dict(
33 |             type="MultiScaleFlipAug",
34 |             img_scale=(1333, 800),
35 |             flip=False,
36 |             transforms=[
37 |                 dict(type="Resize", keep_ratio=True),
38 |                 dict(type="RandomFlip"),
39 |                 dict(type="Normalize"),
40 |                 dict(type="Pad", size_divisor=32),
41 |                 dict(type="DefaultFormatBundle"),
42 |                 dict(type="Collect", keys=["img"]),
43 |             ],
44 |         ),
45 |     ]
46 |     with pytest.warns(UserWarning):
47 |         assert expected_pipelines == replace_ImageToTensor(pipelines)
48 | 
49 |     # without MultiScaleFlipAug
50 |     pipelines = [
51 |         dict(type="LoadImageFromFile"),
52 |         dict(type="Resize", keep_ratio=True),
53 |         dict(type="RandomFlip"),
54 |         dict(type="Normalize"),
55 |         dict(type="Pad", size_divisor=32),
56 |         dict(type="ImageToTensor", keys=["img"]),
57 |         dict(type="Collect", keys=["img"]),
58 |     ]
59 |     expected_pipelines = [
60 |         dict(type="LoadImageFromFile"),
61 |         dict(type="Resize", keep_ratio=True),
62 |         dict(type="RandomFlip"),
63 |         dict(type="Normalize"),
64 |         dict(type="Pad", size_divisor=32),
65 |         dict(type="DefaultFormatBundle"),
66 |         dict(type="Collect", keys=["img"]),
67 |     ]
68 |     with pytest.warns(UserWarning):
69 |         assert expected_pipelines == replace_ImageToTensor(pipelines)
70 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # dataset settings
 9 | dataset_type = "WIDERFaceDataset"
10 | data_root = "data/WIDERFace/"
11 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
12 | train_pipeline = [
13 |     dict(type="LoadImageFromFile", to_float32=True),
14 |     dict(type="LoadAnnotations", with_bbox=True),
15 |     dict(
16 |         type="PhotoMetricDistortion",
17 |         brightness_delta=32,
18 |         contrast_range=(0.5, 1.5),
19 |         saturation_range=(0.5, 1.5),
20 |         hue_delta=18,
21 |     ),
22 |     dict(
23 |         type="Expand",
24 |         mean=img_norm_cfg["mean"],
25 |         to_rgb=img_norm_cfg["to_rgb"],
26 |         ratio_range=(1, 4),
27 |     ),
28 |     dict(
29 |         type="MinIoURandomCrop", min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3
30 |     ),
31 |     dict(type="Resize", img_scale=(300, 300), keep_ratio=False),
32 |     dict(type="Normalize", **img_norm_cfg),
33 |     dict(type="RandomFlip", flip_ratio=0.5),
34 |     dict(type="DefaultFormatBundle"),
35 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
36 | ]
37 | test_pipeline = [
38 |     dict(type="LoadImageFromFile"),
39 |     dict(
40 |         type="MultiScaleFlipAug",
41 |         img_scale=(300, 300),
42 |         flip=False,
43 |         transforms=[
44 |             dict(type="Resize", keep_ratio=False),
45 |             dict(type="Normalize", **img_norm_cfg),
46 |             dict(type="ImageToTensor", keys=["img"]),
47 |             dict(type="Collect", keys=["img"]),
48 |         ],
49 |     ),
50 | ]
51 | data = dict(
52 |     samples_per_gpu=60,
53 |     workers_per_gpu=2,
54 |     train=dict(
55 |         type="RepeatDataset",
56 |         times=2,
57 |         dataset=dict(
58 |             type=dataset_type,
59 |             ann_file=data_root + "train.txt",
60 |             img_prefix=data_root + "WIDER_train/",
61 |             min_size=17,
62 |             pipeline=train_pipeline,
63 |         ),
64 |     ),
65 |     val=dict(
66 |         type=dataset_type,
67 |         ann_file=data_root + "val.txt",
68 |         img_prefix=data_root + "WIDER_val/",
69 |         pipeline=test_pipeline,
70 |     ),
71 |     test=dict(
72 |         type=dataset_type,
73 |         ann_file=data_root + "val.txt",
74 |         img_prefix=data_root + "WIDER_val/",
75 |         pipeline=test_pipeline,
76 |     ),
77 | )
78 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/rpn_test_mixin.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import sys
 9 | 
10 | from mmdet.core import merge_aug_proposals
11 | 
12 | if sys.version_info >= (3, 7):
13 |     from mmdet.utils.contextmanagers import completed
14 | 
15 | 
16 | class RPNTestMixin:
17 |     """Test methods of RPN."""
18 | 
19 |     if sys.version_info >= (3, 7):
20 | 
21 |         async def async_simple_test_rpn(self, x, img_metas):
22 |             sleep_interval = self.test_cfg.pop("async_sleep_interval", 0.025)
23 |             async with completed(
24 |                 __name__, "rpn_head_forward", sleep_interval=sleep_interval
25 |             ):
26 |                 rpn_outs = self(x)
27 | 
28 |             proposal_list = self.get_bboxes(*rpn_outs, img_metas)
29 |             return proposal_list
30 | 
31 |     def simple_test_rpn(self, x, img_metas):
32 |         """Test without augmentation.
33 | 
34 |         Args:
35 |             x (tuple[Tensor]): Features from the upstream network, each is
36 |                 a 4D-tensor.
37 |             img_metas (list[dict]): Meta info of each image.
38 | 
39 |         Returns:
40 |             list[Tensor]: Proposals of each image.
41 |         """
42 |         rpn_outs = self(x)
43 |         proposal_list = self.get_bboxes(*rpn_outs, img_metas)
44 |         return proposal_list
45 | 
46 |     def aug_test_rpn(self, feats, img_metas):
47 |         samples_per_gpu = len(img_metas[0])
48 |         aug_proposals = [[] for _ in range(samples_per_gpu)]
49 |         for x, img_meta in zip(feats, img_metas):
50 |             proposal_list = self.simple_test_rpn(x, img_meta)
51 |             for i, proposals in enumerate(proposal_list):
52 |                 aug_proposals[i].append(proposals)
53 |         # reorganize the order of 'img_metas' to match the dimensions
54 |         # of 'aug_proposals'
55 |         aug_img_metas = []
56 |         for i in range(samples_per_gpu):
57 |             aug_img_meta = []
58 |             for j in range(len(img_metas)):
59 |                 aug_img_meta.append(img_metas[j][i])
60 |             aug_img_metas.append(aug_img_meta)
61 |         # after merging, proposals will be rescaled to the original image size
62 |         merged_proposals = [
63 |             merge_aug_proposals(proposals, aug_img_meta, self.test_cfg)
64 |             for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)
65 |         ]
66 |         return merged_proposals
67 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # model settings
 9 | model = dict(
10 |     type="FastRCNN",
11 |     pretrained="torchvision://resnet50",
12 |     backbone=dict(
13 |         type="ResNet",
14 |         depth=50,
15 |         num_stages=4,
16 |         out_indices=(0, 1, 2, 3),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type="BN", requires_grad=True),
19 |         norm_eval=True,
20 |         style="pytorch",
21 |     ),
22 |     neck=dict(
23 |         type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5
24 |     ),
25 |     roi_head=dict(
26 |         type="StandardRoIHead",
27 |         bbox_roi_extractor=dict(
28 |             type="SingleRoIExtractor",
29 |             roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
30 |             out_channels=256,
31 |             featmap_strides=[4, 8, 16, 32],
32 |         ),
33 |         bbox_head=dict(
34 |             type="Shared2FCBBoxHead",
35 |             in_channels=256,
36 |             fc_out_channels=1024,
37 |             roi_feat_size=7,
38 |             num_classes=80,
39 |             bbox_coder=dict(
40 |                 type="DeltaXYWHBBoxCoder",
41 |                 target_means=[0.0, 0.0, 0.0, 0.0],
42 |                 target_stds=[0.1, 0.1, 0.2, 0.2],
43 |             ),
44 |             reg_class_agnostic=False,
45 |             loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
46 |             loss_bbox=dict(type="L1Loss", loss_weight=1.0),
47 |         ),
48 |     ),
49 |     # model training and testing settings
50 |     train_cfg=dict(
51 |         rcnn=dict(
52 |             assigner=dict(
53 |                 type="MaxIoUAssigner",
54 |                 pos_iou_thr=0.5,
55 |                 neg_iou_thr=0.5,
56 |                 min_pos_iou=0.5,
57 |                 match_low_quality=False,
58 |                 ignore_iof_thr=-1,
59 |             ),
60 |             sampler=dict(
61 |                 type="RandomSampler",
62 |                 num=512,
63 |                 pos_fraction=0.25,
64 |                 neg_pos_ub=-1,
65 |                 add_gt_as_proposals=True,
66 |             ),
67 |             pos_weight=-1,
68 |             debug=False,
69 |         )
70 |     ),
71 |     test_cfg=dict(
72 |         rcnn=dict(
73 |             score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100
74 |         )
75 |     ),
76 | )
77 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import warnings
 9 | 
10 | from mmcv.utils import build_from_cfg, Registry
11 | from torch import nn
12 | 
13 | BACKBONES = Registry("backbone")
14 | NECKS = Registry("neck")
15 | ROI_EXTRACTORS = Registry("roi_extractor")
16 | SHARED_HEADS = Registry("shared_head")
17 | HEADS = Registry("head")
18 | LOSSES = Registry("loss")
19 | DETECTORS = Registry("detector")
20 | 
21 | 
22 | def build(cfg, registry, default_args=None):
23 |     """Build a module.
24 | 
25 |     Args:
26 |         cfg (dict, list[dict]): The config of modules, is is either a dict
27 |             or a list of configs.
28 |         registry (:obj:`Registry`): A registry the module belongs to.
29 |         default_args (dict, optional): Default arguments to build the module.
30 |             Defaults to None.
31 | 
32 |     Returns:
33 |         nn.Module: A built nn module.
34 |     """
35 |     if isinstance(cfg, list):
36 |         modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg]
37 |         return nn.Sequential(*modules)
38 |     else:
39 |         return build_from_cfg(cfg, registry, default_args)
40 | 
41 | 
42 | def build_backbone(cfg):
43 |     """Build backbone."""
44 |     return build(cfg, BACKBONES)
45 | 
46 | 
47 | def build_neck(cfg):
48 |     """Build neck."""
49 |     return build(cfg, NECKS)
50 | 
51 | 
52 | def build_roi_extractor(cfg):
53 |     """Build roi extractor."""
54 |     return build(cfg, ROI_EXTRACTORS)
55 | 
56 | 
57 | def build_shared_head(cfg):
58 |     """Build shared head."""
59 |     return build(cfg, SHARED_HEADS)
60 | 
61 | 
62 | def build_head(cfg):
63 |     """Build head."""
64 |     return build(cfg, HEADS)
65 | 
66 | 
67 | def build_loss(cfg):
68 |     """Build loss."""
69 |     return build(cfg, LOSSES)
70 | 
71 | 
72 | def build_detector(cfg, train_cfg=None, test_cfg=None):
73 |     """Build detector."""
74 |     if train_cfg is not None or test_cfg is not None:
75 |         warnings.warn(
76 |             "train_cfg and test_cfg is deprecated, " "please specify them in model",
77 |             UserWarning,
78 |         )
79 |     assert (
80 |         cfg.get("train_cfg") is None or train_cfg is None
81 |     ), "train_cfg specified in both outer field and model field "
82 |     assert (
83 |         cfg.get("test_cfg") is None or test_cfg is None
84 |     ), "test_cfg specified in both outer field and model field "
85 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
86 | 


--------------------------------------------------------------------------------
/pa_lib/cython_lib/graph_helper.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | # cython: language_level=3
 9 | 
10 | cimport cython
11 | cimport numpy as np
12 | from libc.stdint cimport int32_t, int64_t
13 | 
14 | import numpy as np
15 | 
16 | cdef int64_t loc2idx(
17 |     int32_t i,
18 |     int32_t j,
19 |     int32_t width,
20 | ):
21 |     cdef int64_t index = i * width + j
22 |     return index
23 | 
24 | @cython.boundscheck(False)
25 | @cython.wraparound(False)
26 | @cython.nonecheck(False)
27 | cdef list generate_nodes_edges_labels_c(
28 |     np.ndarray[np.float32_t, ndim=3] potentials,
29 | ):
30 |     cdef int32_t height = potentials.shape[1]
31 |     cdef int32_t width = potentials.shape[2]
32 |     cdef np.ndarray[np.uint32_t, ndim=2] new_label
33 | 
34 |     cdef int64_t num_nodes = int(potentials.size / 4)
35 | 
36 |     cdef list out_nodes = []
37 |     cdef dict property
38 |     cdef tuple node
39 |     for i in range(num_nodes):
40 |         property = {"labels": [i]}
41 |         node = (i, property)
42 |         out_nodes.append(node)
43 | 
44 |     cdef np.ndarray[np.float32_t, ndim=1] potential
45 |     cdef int64_t curr_idx, neighbor
46 |     cdef list out_edges = []
47 |     cdef tuple edge
48 | 
49 |     new_label = np.zeros((height, width), dtype=np.uint32)
50 | 
51 |     for i in range(height):
52 |         for j in range(width):
53 |             potential = potentials[:, i, j]
54 |             curr_idx = loc2idx(i, j, width)
55 |             new_label[i, j] = curr_idx
56 |             if i - 1 >= 0:
57 |                 neighbor = loc2idx(i - 1, j, width)
58 |                 edge = (curr_idx, neighbor, 1 - potential[0])
59 |                 out_edges.append(edge)
60 |             if j - 1 >= 0:
61 |                 neighbor = loc2idx(i, j - 1, width)
62 |                 edge = (curr_idx, neighbor, 1 - potential[1])
63 |                 out_edges.append(edge)
64 |             if i - 1 >= 0 and j - 1 >= 0:
65 |                 neighbor = loc2idx(i - 1, j - 1, width)
66 |                 edge = (curr_idx, neighbor, 1 - potential[2])
67 |                 out_edges.append(edge)
68 |             if i + 1 < height and j - 1 >= 0:
69 |                 neighbor = loc2idx(i + 1, j - 1, width)
70 |                 edge = (curr_idx, neighbor, 1 - potential[3])
71 |                 out_edges.append(edge)
72 |     return [out_nodes, out_edges, new_label]
73 | 
74 | 
75 | def generate_nodes_edges_labels(
76 |     np.ndarray potentials,
77 | ) -> list:
78 |     return generate_nodes_edges_labels_c(potentials)
79 | 


--------------------------------------------------------------------------------
/mmdet/datasets/coco_split_online.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import json
 9 | import random
10 | 
11 | from pycocotools.coco import COCO
12 | 
13 | from .builder import DATASETS
14 | from .coco_split import CocoSplitDataset
15 | 
16 | 
17 | @DATASETS.register_module()
18 | class CocoSplitOnlineDataset(CocoSplitDataset):
19 |     """
20 |     Different from other MMDet dataset, this one loads annotations
21 |         online instead of from a whole json. This is more memory
22 |         efficient albeit a little bit slower.
23 |     This enables training on 3M+ masks, which would not be feasible
24 |         with a single json to store.
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         ann_dir=None,
30 |         iou_thresh=None,
31 |         score_thresh=None,
32 |         top_k=None,
33 |         random_sample_masks=False,
34 |         **kwargs,
35 |     ):
36 |         """
37 |         Args:
38 |             ann_dir: directory to store the annotations, where annotations
39 |                 for each image is stored as "image_id.json"
40 |         For other arguments please see coco_split_pseudo_masks.py
41 |         """
42 |         self.ann_dir = ann_dir
43 |         self.iou_thresh = iou_thresh
44 |         self.score_thresh = score_thresh
45 |         self.top_k = top_k
46 |         self.random_sample_masks = random_sample_masks
47 |         super(CocoSplitOnlineDataset, self).__init__(**kwargs)
48 | 
49 |     # Override to load pseudo masks online
50 |     def get_ann_info(self, idx):
51 |         img_id = self.data_infos[idx]["id"]
52 |         ann_info = json.load(open(f"{self.ann_dir}{img_id}.json"))
53 |         ann_info = self.sample_targets(ann_info)
54 |         return self._parse_ann_info(self.data_infos[idx], ann_info)
55 | 
56 |     def sample_targets(self, annotations):
57 |         new_anns = annotations
58 |         if self.iou_thresh is not None:
59 |             tmp_new_anns = []
60 |             for ann in new_anns:
61 |                 if ann["gt_iou"] < self.iou_thresh:
62 |                     tmp_new_anns.append(ann)
63 |             new_anns = tmp_new_anns
64 |         if self.score_thresh is not None:
65 |             tmp_new_anns = []
66 |             for ann in new_anns:
67 |                 if ann["score"] >= self.score_thresh:
68 |                     tmp_new_anns.append(ann)
69 |             new_anns = tmp_new_anns
70 |         if self.random_sample_masks:
71 |             random.shuffle(new_anns)
72 |         if self.top_k is not None:
73 |             new_anns = new_anns[: self.top_k]
74 |         return new_anns
75 | 


--------------------------------------------------------------------------------
/tools/model_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import cv2
 9 | import mmcv
10 | import numpy as np
11 | import torch
12 | from mmcv import Config, DictAction
13 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint, wrap_fp16_model
14 | from mmdet.core import bbox2roi
15 | from mmdet.models import build_detector
16 | 
17 | cfg_file = "./configs/oln_mask/two_tower_example.py"
18 | checkpoint = "/checkpoint/weiyaowang/pairwise_potential/coco_to_lvis/maskrcnn_baseline/latest.pth"
19 | img_path = "/checkpoint/trandu/oln/data/coco/train2017/000000391895.jpg"
20 | 
21 | model_config = Config.fromfile(cfg_file).model
22 | model_config.test_cfg.rcnn.nms = dict(type="nms", iou_threshold=1.0)
23 | two_tower = build_detector(model_config)
24 | # load_checkpoint(mask_rcnn, checkpoint, map_location='cpu')
25 | two_tower.cpu()
26 | two_tower.eval()
27 | input_img = cv2.imread(img_path)
28 | img_norm_cfg = dict(
29 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
30 | )
31 | input_img = mmcv.imnormalize(
32 |     input_img,
33 |     np.array(img_norm_cfg["mean"]),
34 |     np.array(img_norm_cfg["std"]),
35 | )
36 | img_metas = [
37 |     {
38 |         "img_shape": input_img.shape,
39 |         "scale_factor": 1.0,
40 |         "ori_shape": input_img.shape,
41 |         "pad_shape": input_img.shape,
42 |     }
43 | ]
44 | 
45 | input_img = np.transpose(input_img, (2, 0, 1))
46 | input_img = torch.from_numpy(input_img)
47 | 
48 | input_img = input_img.unsqueeze(0)
49 | 
50 | out = two_tower.simple_test(input_img, img_metas)
51 | 
52 | # features = mask_rcnn.extract_feat(input_img)
53 | 
54 | # # (tl_x, tl_y, br_x, br_y)
55 | # proposal_list = mask_rcnn.rpn_head.simple_test_rpn(features, img_metas)
56 | # det_bboxes, det_labels = mask_rcnn.roi_head.simple_test_bboxes(
57 | #     features, img_metas, proposal_list, mask_rcnn.roi_head.test_cfg, rescale=False
58 | # )
59 | # segm_results = mask_rcnn.roi_head.simple_test_mask(
60 | #     features, img_metas, det_bboxes, det_labels, rescale=False
61 | # )
62 | # print(proposal_list[0].shape)
63 | # roi_out = mask_rcnn.roi_head.forward_dummy(features, proposal_list[0])
64 | # print(roi_out[0].shape)
65 | # print(roi_out[1].shape)
66 | # print(roi_out[2].shape)
67 | # print(roi_out[3].shape)
68 | 
69 | # print(f"det boxes: {det_bboxes[0].shape}")
70 | 
71 | # print(proposal_list[0][::200, :4])
72 | # print(det_bboxes[0][::200, :4])
73 | # print(segm_results[0].shape)
74 | 
75 | # print(roi_out[1][::200, :4])
76 | # print(roi_out[2][0, 0, :, :])
77 | # print(roi_out[1][:5, :4])
78 | # print(roi_out[3][:5, :4])
79 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import mmcv
 9 | import numpy as np
10 | import pycocotools.mask as mask_util
11 | 
12 | 
13 | def split_combined_polys(polys, poly_lens, polys_per_mask):
14 |     """Split the combined 1-D polys into masks.
15 | 
16 |     A mask is represented as a list of polys, and a poly is represented as
17 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
18 |     tensor. Here we need to split the tensor into original representations.
19 | 
20 |     Args:
21 |         polys (list): a list (length = image num) of 1-D tensors
22 |         poly_lens (list): a list (length = image num) of poly length
23 |         polys_per_mask (list): a list (length = image num) of poly number
24 |             of each mask
25 | 
26 |     Returns:
27 |         list: a list (length = image num) of list (length = mask num) of \
28 |             list (length = poly num) of numpy array.
29 |     """
30 |     mask_polys_list = []
31 |     for img_id in range(len(polys)):
32 |         polys_single = polys[img_id]
33 |         polys_lens_single = poly_lens[img_id].tolist()
34 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
35 | 
36 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
37 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
38 |         mask_polys_list.append(mask_polys)
39 |     return mask_polys_list
40 | 
41 | 
42 | # TODO: move this function to more proper place
43 | def encode_mask_results(mask_results):
44 |     """Encode bitmap mask to RLE code.
45 | 
46 |     Args:
47 |         mask_results (list | tuple[list]): bitmap mask results.
48 |             In mask scoring rcnn, mask_results is a tuple of (segm_results,
49 |             segm_cls_score).
50 | 
51 |     Returns:
52 |         list | tuple: RLE encoded mask.
53 |     """
54 |     if isinstance(mask_results, tuple):  # mask scoring
55 |         cls_segms, cls_mask_scores = mask_results
56 |     else:
57 |         cls_segms = mask_results
58 |     num_classes = len(cls_segms)
59 |     encoded_mask_results = [[] for _ in range(num_classes)]
60 |     for i in range(len(cls_segms)):
61 |         for cls_segm in cls_segms[i]:
62 |             encoded_mask_results[i].append(
63 |                 mask_util.encode(
64 |                     np.array(cls_segm[:, :, np.newaxis], order="F", dtype="uint8")
65 |                 )[0]
66 |             )  # encoded with RLE
67 |     if isinstance(mask_results, tuple):
68 |         return encoded_mask_results, cls_mask_scores
69 |     else:
70 |         return encoded_mask_results
71 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | from ..builder import BBOX_SAMPLERS
12 | from .random_sampler import RandomSampler
13 | 
14 | 
15 | @BBOX_SAMPLERS.register_module()
16 | class InstanceBalancedPosSampler(RandomSampler):
17 |     """Instance balanced sampler that samples equal number of positive samples
18 |     for each instance."""
19 | 
20 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
21 |         """Sample positive boxes.
22 | 
23 |         Args:
24 |             assign_result (:obj:`AssignResult`): The assigned results of boxes.
25 |             num_expected (int): The number of expected positive samples
26 | 
27 |         Returns:
28 |             Tensor or ndarray: sampled indices.
29 |         """
30 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
31 |         if pos_inds.numel() != 0:
32 |             pos_inds = pos_inds.squeeze(1)
33 |         if pos_inds.numel() <= num_expected:
34 |             return pos_inds
35 |         else:
36 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
37 |             num_gts = len(unique_gt_inds)
38 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
39 |             sampled_inds = []
40 |             for i in unique_gt_inds:
41 |                 inds = torch.nonzero(assign_result.gt_inds == i.item(), as_tuple=False)
42 |                 if inds.numel() != 0:
43 |                     inds = inds.squeeze(1)
44 |                 else:
45 |                     continue
46 |                 if len(inds) > num_per_gt:
47 |                     inds = self.random_choice(inds, num_per_gt)
48 |                 sampled_inds.append(inds)
49 |             sampled_inds = torch.cat(sampled_inds)
50 |             if len(sampled_inds) < num_expected:
51 |                 num_extra = num_expected - len(sampled_inds)
52 |                 extra_inds = np.array(
53 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))
54 |                 )
55 |                 if len(extra_inds) > num_extra:
56 |                     extra_inds = self.random_choice(extra_inds, num_extra)
57 |                 extra_inds = (
58 |                     torch.from_numpy(extra_inds).to(assign_result.gt_inds.device).long()
59 |                 )
60 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
61 |             elif len(sampled_inds) > num_expected:
62 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
63 |             return sampled_inds
64 | 


--------------------------------------------------------------------------------
/tools/browse_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | import os
10 | from pathlib import Path
11 | 
12 | import mmcv
13 | from mmcv import Config
14 | from mmdet.core.utils import mask2ndarray
15 | from mmdet.core.visualization import imshow_det_bboxes
16 | from mmdet.datasets.builder import build_dataset
17 | 
18 | 
19 | def parse_args():
20 |     parser = argparse.ArgumentParser(description="Browse a dataset")
21 |     parser.add_argument("config", help="train config file path")
22 |     parser.add_argument(
23 |         "--skip-type",
24 |         type=str,
25 |         nargs="+",
26 |         default=["DefaultFormatBundle", "Normalize", "Collect"],
27 |         help="skip some useless pipeline",
28 |     )
29 |     parser.add_argument(
30 |         "--output-dir",
31 |         default=None,
32 |         type=str,
33 |         help="If there is no display interface, you can save it",
34 |     )
35 |     parser.add_argument("--not-show", default=False, action="store_true")
36 |     parser.add_argument(
37 |         "--show-interval", type=float, default=2, help="the interval of show (s)"
38 |     )
39 |     args = parser.parse_args()
40 |     return args
41 | 
42 | 
43 | def retrieve_data_cfg(config_path, skip_type):
44 |     cfg = Config.fromfile(config_path)
45 |     train_data_cfg = cfg.data.train
46 |     train_data_cfg["pipeline"] = [
47 |         x for x in train_data_cfg.pipeline if x["type"] not in skip_type
48 |     ]
49 | 
50 |     return cfg
51 | 
52 | 
53 | def main():
54 |     args = parse_args()
55 |     cfg = retrieve_data_cfg(args.config, args.skip_type)
56 | 
57 |     dataset = build_dataset(cfg.data.train)
58 | 
59 |     progress_bar = mmcv.ProgressBar(len(dataset))
60 | 
61 |     for item in dataset:
62 |         filename = (
63 |             os.path.join(args.output_dir, Path(item["filename"]).name)
64 |             if args.output_dir is not None
65 |             else None
66 |         )
67 | 
68 |         gt_masks = item.get("gt_masks", None)
69 |         if gt_masks is not None:
70 |             gt_masks = mask2ndarray(gt_masks)
71 | 
72 |         imshow_det_bboxes(
73 |             item["img"],
74 |             item["gt_bboxes"],
75 |             item["gt_labels"],
76 |             gt_masks,
77 |             class_names=dataset.CLASSES,
78 |             show=not args.not_show,
79 |             wait_time=args.show_interval,
80 |             out_file=filename,
81 |             bbox_color=(255, 102, 61),
82 |             text_color=(255, 102, 61),
83 |         )
84 | 
85 |         progress_bar.update()
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/mmdet/datasets/coco_split_pseudo_masks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import random
 9 | 
10 | from pycocotools.coco import COCO
11 | 
12 | from .builder import DATASETS
13 | from .coco_split import CocoSplitDataset
14 | 
15 | 
16 | @DATASETS.register_module()
17 | class CocoSplitPseudoMasksDataset(CocoSplitDataset):
18 |     """
19 |     Used to joint train on images with both pseudo-GT and GT.
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         additional_ann_file=None,
25 |         iou_thresh=None,
26 |         score_thresh=None,
27 |         top_k=None,
28 |         random_sample_masks=False,
29 |         **kwargs,
30 |     ):
31 |         # Add additional annotation file (eg. from pseudo masks)
32 |         self.additional_coco = None
33 |         if additional_ann_file is not None:
34 |             self.additional_coco = COCO(additional_ann_file)
35 |         self.iou_thresh = iou_thresh
36 |         self.score_thresh = score_thresh
37 |         self.top_k = top_k
38 |         self.random_sample_masks = random_sample_masks
39 |         super(CocoSplitPseudoMasksDataset, self).__init__(**kwargs)
40 | 
41 |     # Override to load pseudo masks
42 |     def get_ann_info(self, idx):
43 |         img_id = self.data_infos[idx]["id"]
44 |         ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
45 |         ann_info = self.coco.load_anns(ann_ids)
46 |         all_anns = []
47 |         all_anns.extend(ann_info)
48 |         if self.additional_coco is not None:
49 |             additional_ann_ids = self.additional_coco.get_ann_ids(img_ids=[img_id])
50 |             additional_ann_info = self.additional_coco.load_anns(additional_ann_ids)
51 |             additional_ann_info = self.sample_targets(additional_ann_info)
52 |             all_anns.extend(additional_ann_info)
53 |         return self._parse_ann_info(self.data_infos[idx], all_anns)
54 | 
55 |     def sample_targets(self, annotations):
56 |         new_anns = annotations
57 |         if self.iou_thresh is not None:
58 |             tmp_new_anns = []
59 |             for ann in new_anns:
60 |                 if ann["gt_iou"] < self.iou_thresh:
61 |                     tmp_new_anns.append(ann)
62 |             new_anns = tmp_new_anns
63 |         if self.score_thresh is not None:
64 |             tmp_new_anns = []
65 |             for ann in new_anns:
66 |                 if ann["score"] >= self.score_thresh:
67 |                     tmp_new_anns.append(ann)
68 |             new_anns = tmp_new_anns
69 |         if self.random_sample_masks:
70 |             random.shuffle(new_anns)
71 |         if self.top_k is not None:
72 |             new_anns = new_anns[: self.top_k]
73 |         return new_anns
74 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import numpy as np
 9 | import torch
10 | from torch.nn.modules.utils import _pair
11 | 
12 | 
13 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, cfg):
14 |     """Compute mask target for positive proposals in multiple images.
15 | 
16 |     Args:
17 |         pos_proposals_list (list[Tensor]): Positive proposals in multiple
18 |             images.
19 |         pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each
20 |             positive proposals.
21 |         gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of
22 |             each image.
23 |         cfg (dict): Config dict that specifies the mask size.
24 | 
25 |     Returns:
26 |         list[Tensor]: Mask target of each image.
27 |     """
28 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
29 |     mask_targets = map(
30 |         mask_target_single,
31 |         pos_proposals_list,
32 |         pos_assigned_gt_inds_list,
33 |         gt_masks_list,
34 |         cfg_list,
35 |     )
36 |     mask_targets = list(mask_targets)
37 |     if len(mask_targets) > 0:
38 |         mask_targets = torch.cat(mask_targets)
39 |     return mask_targets
40 | 
41 | 
42 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
43 |     """Compute mask target for each positive proposal in the image.
44 | 
45 |     Args:
46 |         pos_proposals (Tensor): Positive proposals.
47 |         pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals.
48 |         gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap
49 |             or Polygon.
50 |         cfg (dict): Config dict that indicate the mask size.
51 | 
52 |     Returns:
53 |         Tensor: Mask target of each positive proposals in the image.
54 |     """
55 |     device = pos_proposals.device
56 |     mask_size = _pair(cfg.mask_size)
57 |     num_pos = pos_proposals.size(0)
58 |     if num_pos > 0:
59 |         proposals_np = pos_proposals.cpu().numpy()
60 |         maxh, maxw = gt_masks.height, gt_masks.width
61 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)
62 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)
63 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
64 | 
65 |         mask_targets = gt_masks.crop_and_resize(
66 |             proposals_np, mask_size, device=device, inds=pos_assigned_gt_inds
67 |         ).to_ndarray()
68 | 
69 |         mask_targets = torch.from_numpy(mask_targets).float().to(device)
70 |     else:
71 |         mask_targets = pos_proposals.new_zeros((0,) + mask_size)
72 | 
73 |     return mask_targets
74 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import warnings
 9 | from collections import OrderedDict
10 | 
11 | import torch.distributed as dist
12 | from mmcv.runner import OptimizerHook
13 | from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors
14 | 
15 | 
16 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
17 |     if bucket_size_mb > 0:
18 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
19 |         buckets = _take_tensors(tensors, bucket_size_bytes)
20 |     else:
21 |         buckets = OrderedDict()
22 |         for tensor in tensors:
23 |             tp = tensor.type()
24 |             if tp not in buckets:
25 |                 buckets[tp] = []
26 |             buckets[tp].append(tensor)
27 |         buckets = buckets.values()
28 | 
29 |     for bucket in buckets:
30 |         flat_tensors = _flatten_dense_tensors(bucket)
31 |         dist.all_reduce(flat_tensors)
32 |         flat_tensors.div_(world_size)
33 |         for tensor, synced in zip(
34 |             bucket, _unflatten_dense_tensors(flat_tensors, bucket)
35 |         ):
36 |             tensor.copy_(synced)
37 | 
38 | 
39 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
40 |     """Allreduce gradients.
41 | 
42 |     Args:
43 |         params (list[torch.Parameters]): List of parameters of a model
44 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
45 |             Defaults to True.
46 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
47 |             Defaults to -1.
48 |     """
49 |     grads = [
50 |         param.grad.data
51 |         for param in params
52 |         if param.requires_grad and param.grad is not None
53 |     ]
54 |     world_size = dist.get_world_size()
55 |     if coalesce:
56 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
57 |     else:
58 |         for tensor in grads:
59 |             dist.all_reduce(tensor.div_(world_size))
60 | 
61 | 
62 | class DistOptimizerHook(OptimizerHook):
63 |     """Deprecated optimizer hook for distributed training."""
64 | 
65 |     def __init__(self, *args, **kwargs):
66 |         warnings.warn(
67 |             '"DistOptimizerHook" is deprecated, please switch to'
68 |             '"mmcv.runner.OptimizerHook".'
69 |         )
70 |         super().__init__(*args, **kwargs)
71 | 
72 | 
73 | def reduce_mean(tensor):
74 |     """ "Obtain the mean of tensor on different GPUs."""
75 |     if not (dist.is_available() and dist.is_initialized()):
76 |         return tensor
77 |     tensor = tensor.clone()
78 |     dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)
79 |     return tensor
80 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | 
10 | 
11 | def images_to_levels(target, num_levels):
12 |     """Convert targets by image to targets by feature level.
13 | 
14 |     [target_img0, target_img1] -> [target_level0, target_level1, ...]
15 |     """
16 |     target = torch.stack(target, 0)
17 |     level_targets = []
18 |     start = 0
19 |     for n in num_levels:
20 |         end = start + n
21 |         # level_targets.append(target[:, start:end].squeeze(0))
22 |         level_targets.append(target[:, start:end])
23 |         start = end
24 |     return level_targets
25 | 
26 | 
27 | def anchor_inside_flags(flat_anchors, valid_flags, img_shape, allowed_border=0):
28 |     """Check whether the anchors are inside the border.
29 | 
30 |     Args:
31 |         flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).
32 |         valid_flags (torch.Tensor): An existing valid flags of anchors.
33 |         img_shape (tuple(int)): Shape of current image.
34 |         allowed_border (int, optional): The border to allow the valid anchor.
35 |             Defaults to 0.
36 | 
37 |     Returns:
38 |         torch.Tensor: Flags indicating whether the anchors are inside a \
39 |             valid range.
40 |     """
41 |     img_h, img_w = img_shape[:2]
42 |     if allowed_border >= 0:
43 |         inside_flags = (
44 |             valid_flags
45 |             & (flat_anchors[:, 0] >= -allowed_border)
46 |             & (flat_anchors[:, 1] >= -allowed_border)
47 |             & (flat_anchors[:, 2] < img_w + allowed_border)
48 |             & (flat_anchors[:, 3] < img_h + allowed_border)
49 |         )
50 |     else:
51 |         inside_flags = valid_flags
52 |     return inside_flags
53 | 
54 | 
55 | def calc_region(bbox, ratio, featmap_size=None):
56 |     """Calculate a proportional bbox region.
57 | 
58 |     The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
59 | 
60 |     Args:
61 |         bbox (Tensor): Bboxes to calculate regions, shape (n, 4).
62 |         ratio (float): Ratio of the output region.
63 |         featmap_size (tuple): Feature map size used for clipping the boundary.
64 | 
65 |     Returns:
66 |         tuple: x1, y1, x2, y2
67 |     """
68 |     x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
69 |     y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
70 |     x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
71 |     y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
72 |     if featmap_size is not None:
73 |         x1 = x1.clamp(min=0, max=featmap_size[1])
74 |         y1 = y1.clamp(min=0, max=featmap_size[0])
75 |         x2 = x2.clamp(min=0, max=featmap_size[1])
76 |         y2 = y2.clamp(min=0, max=featmap_size[0])
77 |     return (x1, y1, x2, y2)
78 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch.nn as nn
 9 | from mmcv.cnn import constant_init, kaiming_init
10 | from mmcv.runner import auto_fp16, load_checkpoint
11 | from mmdet.models.backbones import ResNet
12 | from mmdet.models.builder import SHARED_HEADS
13 | from mmdet.models.utils import ResLayer as _ResLayer
14 | from mmdet.utils import get_root_logger
15 | 
16 | 
17 | @SHARED_HEADS.register_module()
18 | class ResLayer(nn.Module):
19 |     def __init__(
20 |         self,
21 |         depth,
22 |         stage=3,
23 |         stride=2,
24 |         dilation=1,
25 |         style="pytorch",
26 |         norm_cfg=dict(type="BN", requires_grad=True),
27 |         norm_eval=True,
28 |         with_cp=False,
29 |         dcn=None,
30 |     ):
31 |         super(ResLayer, self).__init__()
32 |         self.norm_eval = norm_eval
33 |         self.norm_cfg = norm_cfg
34 |         self.stage = stage
35 |         self.fp16_enabled = False
36 |         block, stage_blocks = ResNet.arch_settings[depth]
37 |         stage_block = stage_blocks[stage]
38 |         planes = 64 * 2**stage
39 |         inplanes = 64 * 2 ** (stage - 1) * block.expansion
40 | 
41 |         res_layer = _ResLayer(
42 |             block,
43 |             inplanes,
44 |             planes,
45 |             stage_block,
46 |             stride=stride,
47 |             dilation=dilation,
48 |             style=style,
49 |             with_cp=with_cp,
50 |             norm_cfg=self.norm_cfg,
51 |             dcn=dcn,
52 |         )
53 |         self.add_module(f"layer{stage + 1}", res_layer)
54 | 
55 |     def init_weights(self, pretrained=None):
56 |         """Initialize the weights in the module.
57 | 
58 |         Args:
59 |             pretrained (str, optional): Path to pre-trained weights.
60 |                 Defaults to None.
61 |         """
62 |         if isinstance(pretrained, str):
63 |             logger = get_root_logger()
64 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
65 |         elif pretrained is None:
66 |             for m in self.modules():
67 |                 if isinstance(m, nn.Conv2d):
68 |                     kaiming_init(m)
69 |                 elif isinstance(m, nn.BatchNorm2d):
70 |                     constant_init(m, 1)
71 |         else:
72 |             raise TypeError("pretrained must be a str or None")
73 | 
74 |     @auto_fp16()
75 |     def forward(self, x):
76 |         res_layer = getattr(self, f"layer{self.stage + 1}")
77 |         out = res_layer(x)
78 |         return out
79 | 
80 |     def train(self, mode=True):
81 |         super(ResLayer, self).train(mode)
82 |         if self.norm_eval:
83 |             for m in self.modules():
84 |                 if isinstance(m, nn.BatchNorm2d):
85 |                     m.eval()
86 | 


--------------------------------------------------------------------------------
/tests/test_async.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | """Tests for async interface."""
 9 | 
10 | import asyncio
11 | import os
12 | import sys
13 | 
14 | import asynctest
15 | import mmcv
16 | import torch
17 | from mmdet.apis import async_inference_detector, init_detector
18 | 
19 | if sys.version_info >= (3, 7):
20 |     from mmdet.utils.contextmanagers import concurrent
21 | 
22 | 
23 | class AsyncTestCase(asynctest.TestCase):
24 |     use_default_loop = False
25 |     forbid_get_event_loop = True
26 | 
27 |     TEST_TIMEOUT = int(os.getenv("ASYNCIO_TEST_TIMEOUT", "30"))
28 | 
29 |     def _run_test_method(self, method):
30 |         result = method()
31 |         if asyncio.iscoroutine(result):
32 |             self.loop.run_until_complete(
33 |                 asyncio.wait_for(result, timeout=self.TEST_TIMEOUT)
34 |             )
35 | 
36 | 
37 | class MaskRCNNDetector:
38 |     def __init__(
39 |         self, model_config, checkpoint=None, streamqueue_size=3, device="cuda:0"
40 |     ):
41 |         self.streamqueue_size = streamqueue_size
42 |         self.device = device
43 |         # build the model and load checkpoint
44 |         self.model = init_detector(model_config, checkpoint=None, device=self.device)
45 |         self.streamqueue = None
46 | 
47 |     async def init(self):
48 |         self.streamqueue = asyncio.Queue()
49 |         for _ in range(self.streamqueue_size):
50 |             stream = torch.cuda.Stream(device=self.device)
51 |             self.streamqueue.put_nowait(stream)
52 | 
53 |     if sys.version_info >= (3, 7):
54 | 
55 |         async def apredict(self, img):
56 |             if isinstance(img, str):
57 |                 img = mmcv.imread(img)
58 |             async with concurrent(self.streamqueue):
59 |                 result = await async_inference_detector(self.model, img)
60 |             return result
61 | 
62 | 
63 | class AsyncInferenceTestCase(AsyncTestCase):
64 |     if sys.version_info >= (3, 7):
65 | 
66 |         async def test_simple_inference(self):
67 |             if not torch.cuda.is_available():
68 |                 import pytest
69 | 
70 |                 pytest.skip("test requires GPU and torch+cuda")
71 | 
72 |             ori_grad_enabled = torch.is_grad_enabled()
73 |             root_dir = os.path.dirname(os.path.dirname(__name__))
74 |             model_config = os.path.join(
75 |                 root_dir, "configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py"
76 |             )
77 |             detector = MaskRCNNDetector(model_config)
78 |             await detector.init()
79 |             img_path = os.path.join(root_dir, "demo/demo.jpg")
80 |             bboxes, _ = await detector.apredict(img_path)
81 |             self.assertTrue(bboxes)
82 |             # asy inference detector will hack grad_enabled,
83 |             # so restore here to avoid it to influence other tests
84 |             torch.set_grad_enabled(ori_grad_enabled)
85 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch
 9 | 
10 | from ..builder import BBOX_SAMPLERS
11 | from .base_sampler import BaseSampler
12 | 
13 | 
14 | @BBOX_SAMPLERS.register_module()
15 | class RandomSampler(BaseSampler):
16 |     """Random sampler.
17 | 
18 |     Args:
19 |         num (int): Number of samples
20 |         pos_fraction (float): Fraction of positive samples
21 |         neg_pos_up (int, optional): Upper bound number of negative and
22 |             positive samples. Defaults to -1.
23 |         add_gt_as_proposals (bool, optional): Whether to add ground truth
24 |             boxes as proposals. Defaults to True.
25 |     """
26 | 
27 |     def __init__(
28 |         self, num, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs
29 |     ):
30 |         from mmdet.core.bbox import demodata
31 | 
32 |         super(RandomSampler, self).__init__(
33 |             num, pos_fraction, neg_pos_ub, add_gt_as_proposals
34 |         )
35 |         self.rng = demodata.ensure_rng(kwargs.get("rng", None))
36 | 
37 |     def random_choice(self, gallery, num):
38 |         """Random select some elements from the gallery.
39 | 
40 |         If `gallery` is a Tensor, the returned indices will be a Tensor;
41 |         If `gallery` is a ndarray or list, the returned indices will be a
42 |         ndarray.
43 | 
44 |         Args:
45 |             gallery (Tensor | ndarray | list): indices pool.
46 |             num (int): expected sample num.
47 | 
48 |         Returns:
49 |             Tensor or ndarray: sampled indices.
50 |         """
51 |         assert len(gallery) >= num
52 | 
53 |         is_tensor = isinstance(gallery, torch.Tensor)
54 |         if not is_tensor:
55 |             if torch.cuda.is_available():
56 |                 device = torch.cuda.current_device()
57 |             else:
58 |                 device = "cpu"
59 |             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
60 |         perm = torch.randperm(gallery.numel(), device=gallery.device)[:num]
61 |         rand_inds = gallery[perm]
62 |         if not is_tensor:
63 |             rand_inds = rand_inds.cpu().numpy()
64 |         return rand_inds
65 | 
66 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
67 |         """Randomly sample some positive samples."""
68 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
69 |         if pos_inds.numel() != 0:
70 |             pos_inds = pos_inds.squeeze(1)
71 |         if pos_inds.numel() <= num_expected:
72 |             return pos_inds
73 |         else:
74 |             return self.random_choice(pos_inds, num_expected)
75 | 
76 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
77 |         """Randomly sample some negative samples."""
78 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
79 |         if neg_inds.numel() != 0:
80 |             neg_inds = neg_inds.squeeze(1)
81 |         if len(neg_inds) <= num_expected:
82 |             return neg_inds
83 |         else:
84 |             return self.random_choice(neg_inds, num_expected)
85 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from abc import ABCMeta, abstractmethod
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | from mmcv import ops
13 | 
14 | 
15 | class BaseRoIExtractor(nn.Module, metaclass=ABCMeta):
16 |     """Base class for RoI extractor.
17 | 
18 |     Args:
19 |         roi_layer (dict): Specify RoI layer type and arguments.
20 |         out_channels (int): Output channels of RoI layers.
21 |         featmap_strides (int): Strides of input feature maps.
22 |     """
23 | 
24 |     def __init__(self, roi_layer, out_channels, featmap_strides):
25 |         super(BaseRoIExtractor, self).__init__()
26 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
27 |         self.out_channels = out_channels
28 |         self.featmap_strides = featmap_strides
29 |         self.fp16_enabled = False
30 | 
31 |     @property
32 |     def num_inputs(self):
33 |         """int: Number of input feature maps."""
34 |         return len(self.featmap_strides)
35 | 
36 |     def init_weights(self):
37 |         pass
38 | 
39 |     def build_roi_layers(self, layer_cfg, featmap_strides):
40 |         """Build RoI operator to extract feature from each level feature map.
41 | 
42 |         Args:
43 |             layer_cfg (dict): Dictionary to construct and config RoI layer
44 |                 operation. Options are modules under ``mmcv/ops`` such as
45 |                 ``RoIAlign``.
46 |             featmap_strides (int): The stride of input feature map w.r.t to the
47 |                 original image size, which would be used to scale RoI
48 |                 coordinate (original image coordinate system) to feature
49 |                 coordinate system.
50 | 
51 |         Returns:
52 |             nn.ModuleList: The RoI extractor modules for each level feature
53 |                 map.
54 |         """
55 | 
56 |         cfg = layer_cfg.copy()
57 |         layer_type = cfg.pop("type")
58 |         assert hasattr(ops, layer_type)
59 |         layer_cls = getattr(ops, layer_type)
60 |         roi_layers = nn.ModuleList(
61 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]
62 |         )
63 |         return roi_layers
64 | 
65 |     def roi_rescale(self, rois, scale_factor):
66 |         """Scale RoI coordinates by scale factor.
67 | 
68 |         Args:
69 |             rois (torch.Tensor): RoI (Region of Interest), shape (n, 5)
70 |             scale_factor (float): Scale factor that RoI will be multiplied by.
71 | 
72 |         Returns:
73 |             torch.Tensor: Scaled RoI.
74 |         """
75 | 
76 |         cx = (rois[:, 1] + rois[:, 3]) * 0.5
77 |         cy = (rois[:, 2] + rois[:, 4]) * 0.5
78 |         w = rois[:, 3] - rois[:, 1]
79 |         h = rois[:, 4] - rois[:, 2]
80 |         new_w = w * scale_factor
81 |         new_h = h * scale_factor
82 |         x1 = cx - new_w * 0.5
83 |         x2 = cx + new_w * 0.5
84 |         y1 = cy - new_h * 0.5
85 |         y2 = cy + new_h * 0.5
86 |         new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1)
87 |         return new_rois
88 | 
89 |     @abstractmethod
90 |     def forward(self, feats, rois, roi_scale_factor=None):
91 |         pass
92 | 


--------------------------------------------------------------------------------
/tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | 
10 | import mmcv
11 | from mmcv import Config, DictAction
12 | from mmdet.datasets import build_dataset
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(
17 |         description="Evaluate metric of the " "results saved in pkl format"
18 |     )
19 |     parser.add_argument("config", help="Config of the model")
20 |     parser.add_argument("pkl_results", help="Results in pickle format")
21 |     parser.add_argument(
22 |         "--format-only",
23 |         action="store_true",
24 |         help="Format the output results without perform evaluation. It is"
25 |         "useful when you want to format the result to a specific format and "
26 |         "submit it to the test server",
27 |     )
28 |     parser.add_argument(
29 |         "--eval",
30 |         type=str,
31 |         nargs="+",
32 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
33 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC',
34 |     )
35 |     parser.add_argument(
36 |         "--cfg-options",
37 |         nargs="+",
38 |         action=DictAction,
39 |         help="override some settings in the used config, the key-value pair "
40 |         "in xxx=yyy format will be merged into config file. If the value to "
41 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
42 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
43 |         "Note that the quotation marks are necessary and that no white space "
44 |         "is allowed.",
45 |     )
46 |     parser.add_argument(
47 |         "--eval-options",
48 |         nargs="+",
49 |         action=DictAction,
50 |         help="custom options for evaluation, the key-value pair in xxx=yyy "
51 |         "format will be kwargs for dataset.evaluate() function",
52 |     )
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | 
57 | def main():
58 |     args = parse_args()
59 | 
60 |     cfg = Config.fromfile(args.config)
61 |     assert args.eval or args.format_only, (
62 |         "Please specify at least one operation (eval/format the results) with "
63 |         'the argument "--eval", "--format-only"'
64 |     )
65 |     if args.eval and args.format_only:
66 |         raise ValueError("--eval and --format_only cannot be both specified")
67 | 
68 |     if args.cfg_options is not None:
69 |         cfg.merge_from_dict(args.cfg_options)
70 |     cfg.data.test.test_mode = True
71 | 
72 |     dataset = build_dataset(cfg.data.test)
73 |     outputs = mmcv.load(args.pkl_results)
74 | 
75 |     kwargs = {} if args.eval_options is None else args.eval_options
76 |     if args.format_only:
77 |         dataset.format_results(outputs, **kwargs)
78 |     if args.eval:
79 |         eval_kwargs = cfg.get("evaluation", {}).copy()
80 |         # hard-code way to remove EvalHook args
81 |         for key in ["interval", "tmpdir", "start", "gpu_collect", "save_best", "rule"]:
82 |             eval_kwargs.pop(key, None)
83 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
84 |         print(dataset.evaluate(outputs, **eval_kwargs))
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     main()
89 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch.nn as nn
 9 | 
10 | 
11 | def accuracy(pred, target, topk=1, thresh=None):
12 |     """Calculate accuracy according to the prediction and target.
13 | 
14 |     Args:
15 |         pred (torch.Tensor): The model prediction, shape (N, num_class)
16 |         target (torch.Tensor): The target of each prediction, shape (N, )
17 |         topk (int | tuple[int], optional): If the predictions in ``topk``
18 |             matches the target, the predictions will be regarded as
19 |             correct ones. Defaults to 1.
20 |         thresh (float, optional): If not None, predictions with scores under
21 |             this threshold are considered incorrect. Default to None.
22 | 
23 |     Returns:
24 |         float | tuple[float]: If the input ``topk`` is a single integer,
25 |             the function will return a single float as accuracy. If
26 |             ``topk`` is a tuple containing multiple integers, the
27 |             function will return a tuple containing accuracies of
28 |             each ``topk`` number.
29 |     """
30 |     assert isinstance(topk, (int, tuple))
31 |     if isinstance(topk, int):
32 |         topk = (topk,)
33 |         return_single = True
34 |     else:
35 |         return_single = False
36 | 
37 |     maxk = max(topk)
38 |     if pred.size(0) == 0:
39 |         accu = [pred.new_tensor(0.0) for i in range(len(topk))]
40 |         return accu[0] if return_single else accu
41 |     assert pred.ndim == 2 and target.ndim == 1
42 |     assert pred.size(0) == target.size(0)
43 |     assert maxk <= pred.size(1), f"maxk {maxk} exceeds pred dimension {pred.size(1)}"
44 |     pred_value, pred_label = pred.topk(maxk, dim=1)
45 |     pred_label = pred_label.t()  # transpose to shape (maxk, N)
46 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
47 |     if thresh is not None:
48 |         # Only prediction values larger than thresh are counted as correct
49 |         correct = correct & (pred_value > thresh).t()
50 |     res = []
51 |     for k in topk:
52 |         correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
53 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
54 |     return res[0] if return_single else res
55 | 
56 | 
57 | class Accuracy(nn.Module):
58 |     def __init__(self, topk=(1,), thresh=None):
59 |         """Module to calculate the accuracy.
60 | 
61 |         Args:
62 |             topk (tuple, optional): The criterion used to calculate the
63 |                 accuracy. Defaults to (1,).
64 |             thresh (float, optional): If not None, predictions with scores
65 |                 under this threshold are considered incorrect. Default to None.
66 |         """
67 |         super().__init__()
68 |         self.topk = topk
69 |         self.thresh = thresh
70 | 
71 |     def forward(self, pred, target):
72 |         """Forward function to calculate accuracy.
73 | 
74 |         Args:
75 |             pred (torch.Tensor): Prediction of models.
76 |             target (torch.Tensor): Target for each prediction.
77 | 
78 |         Returns:
79 |             tuple[float]: The accuracies under different topk criterions.
80 |         """
81 |         return accuracy(pred, target, self.topk, self.thresh)
82 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree
  6 | 
  7 | 
  8 | # Configuration file for the Sphinx documentation builder.
  9 | #
 10 | # This file only contains a selection of the most common options. For a full
 11 | # list see the documentation:
 12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 13 | 
 14 | # -- Path setup --------------------------------------------------------------
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import subprocess
 22 | import sys
 23 | 
 24 | sys.path.insert(0, os.path.abspath(".."))
 25 | 
 26 | # -- Project information -----------------------------------------------------
 27 | 
 28 | project = "MMDetection"
 29 | copyright = "2018-2020, OpenMMLab"
 30 | author = "MMDetection Authors"
 31 | version_file = "../mmdet/version.py"
 32 | 
 33 | 
 34 | def get_version():
 35 |     with open(version_file, "r") as f:
 36 |         exec(compile(f.read(), version_file, "exec"))
 37 |     return locals()["__version__"]
 38 | 
 39 | 
 40 | # The full version, including alpha/beta/rc tags
 41 | release = get_version()
 42 | 
 43 | # -- General configuration ---------------------------------------------------
 44 | 
 45 | # Add any Sphinx extension module names here, as strings. They can be
 46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 47 | # ones.
 48 | extensions = [
 49 |     "sphinx.ext.autodoc",
 50 |     "sphinx.ext.napoleon",
 51 |     "sphinx.ext.viewcode",
 52 |     "recommonmark",
 53 |     "sphinx_markdown_tables",
 54 | ]
 55 | 
 56 | autodoc_mock_imports = [
 57 |     "matplotlib",
 58 |     "pycocotools",
 59 |     "terminaltables",
 60 |     "mmdet.version",
 61 |     "mmcv.ops",
 62 | ]
 63 | 
 64 | # Add any paths that contain templates here, relative to this directory.
 65 | templates_path = ["_templates"]
 66 | 
 67 | # The suffix(es) of source filenames.
 68 | # You can specify multiple suffix as a list of string:
 69 | #
 70 | source_suffix = {
 71 |     ".rst": "restructuredtext",
 72 |     ".md": "markdown",
 73 | }
 74 | 
 75 | # The master toctree document.
 76 | master_doc = "index"
 77 | 
 78 | # List of patterns, relative to source directory, that match files and
 79 | # directories to ignore when looking for source files.
 80 | # This pattern also affects html_static_path and html_extra_path.
 81 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 82 | 
 83 | # -- Options for HTML output -------------------------------------------------
 84 | 
 85 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 86 | # a list of builtin themes.
 87 | #
 88 | html_theme = "sphinx_rtd_theme"
 89 | 
 90 | # Add any paths that contain custom static files (such as style sheets) here,
 91 | # relative to this directory. They are copied after the builtin static files,
 92 | # so a file named "default.css" will overwrite the builtin "default.css".
 93 | html_static_path = ["_static"]
 94 | 
 95 | 
 96 | def builder_inited_handler(app):
 97 |     subprocess.run(["./stat.py"])
 98 | 
 99 | 
100 | def setup(app):
101 |     app.connect("builder-inited", builder_inited_handler)
102 | 


--------------------------------------------------------------------------------
/tools/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import argparse
 9 | from collections import OrderedDict
10 | 
11 | import torch
12 | 
13 | 
14 | def convert_stem(model_key, model_weight, state_dict, converted_names):
15 |     new_key = model_key.replace("stem.conv", "conv1")
16 |     new_key = new_key.replace("stem.bn", "bn1")
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f"Convert {model_key} to {new_key}")
20 | 
21 | 
22 | def convert_head(model_key, model_weight, state_dict, converted_names):
23 |     new_key = model_key.replace("head.fc", "fc")
24 |     state_dict[new_key] = model_weight
25 |     converted_names.add(model_key)
26 |     print(f"Convert {model_key} to {new_key}")
27 | 
28 | 
29 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
30 |     split_keys = model_key.split(".")
31 |     layer, block, module = split_keys[:3]
32 |     block_id = int(block[1:])
33 |     layer_name = f"layer{int(layer[1:])}"
34 |     block_name = f"{block_id - 1}"
35 | 
36 |     if block_id == 1 and module == "bn":
37 |         new_key = f"{layer_name}.{block_name}.downsample.1.{split_keys[-1]}"
38 |     elif block_id == 1 and module == "proj":
39 |         new_key = f"{layer_name}.{block_name}.downsample.0.{split_keys[-1]}"
40 |     elif module == "f":
41 |         if split_keys[3] == "a_bn":
42 |             module_name = "bn1"
43 |         elif split_keys[3] == "b_bn":
44 |             module_name = "bn2"
45 |         elif split_keys[3] == "c_bn":
46 |             module_name = "bn3"
47 |         elif split_keys[3] == "a":
48 |             module_name = "conv1"
49 |         elif split_keys[3] == "b":
50 |             module_name = "conv2"
51 |         elif split_keys[3] == "c":
52 |             module_name = "conv3"
53 |         new_key = f"{layer_name}.{block_name}.{module_name}.{split_keys[-1]}"
54 |     else:
55 |         raise ValueError(f"Unsupported conversion of key {model_key}")
56 |     print(f"Convert {model_key} to {new_key}")
57 |     state_dict[new_key] = model_weight
58 |     converted_names.add(model_key)
59 | 
60 | 
61 | def convert(src, dst):
62 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
63 |     # load caffe model
64 |     regnet_model = torch.load(src)
65 |     blobs = regnet_model["model_state"]
66 |     # convert to pytorch style
67 |     state_dict = OrderedDict()
68 |     converted_names = set()
69 |     for key, weight in blobs.items():
70 |         if "stem" in key:
71 |             convert_stem(key, weight, state_dict, converted_names)
72 |         elif "head" in key:
73 |             convert_head(key, weight, state_dict, converted_names)
74 |         elif key.startswith("s"):
75 |             convert_reslayer(key, weight, state_dict, converted_names)
76 | 
77 |     # check if all layers are converted
78 |     for key in blobs:
79 |         if key not in converted_names:
80 |             print(f"not converted: {key}")
81 |     # save checkpoint
82 |     checkpoint = dict()
83 |     checkpoint["state_dict"] = state_dict
84 |     torch.save(checkpoint, dst)
85 | 
86 | 
87 | def main():
88 |     parser = argparse.ArgumentParser(description="Convert model keys")
89 |     parser.add_argument("src", help="src detectron model path")
90 |     parser.add_argument("dst", help="save path")
91 |     args = parser.parse_args()
92 |     convert(args.src, args.dst)
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/base_roi_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree
  6 | 
  7 | 
  8 | from abc import ABCMeta, abstractmethod
  9 | 
 10 | import torch.nn as nn
 11 | 
 12 | from ..builder import build_shared_head
 13 | 
 14 | 
 15 | class BaseRoIHead(nn.Module, metaclass=ABCMeta):
 16 |     """Base class for RoIHeads."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         bbox_roi_extractor=None,
 21 |         bbox_head=None,
 22 |         mask_roi_extractor=None,
 23 |         mask_head=None,
 24 |         shared_head=None,
 25 |         train_cfg=None,
 26 |         test_cfg=None,
 27 |     ):
 28 |         super(BaseRoIHead, self).__init__()
 29 |         self.train_cfg = train_cfg
 30 |         self.test_cfg = test_cfg
 31 |         if shared_head is not None:
 32 |             self.shared_head = build_shared_head(shared_head)
 33 | 
 34 |         if bbox_head is not None:
 35 |             self.init_bbox_head(bbox_roi_extractor, bbox_head)
 36 | 
 37 |         if mask_head is not None:
 38 |             self.init_mask_head(mask_roi_extractor, mask_head)
 39 | 
 40 |         self.init_assigner_sampler()
 41 | 
 42 |     @property
 43 |     def with_bbox(self):
 44 |         """bool: whether the RoI head contains a `bbox_head`"""
 45 |         return hasattr(self, "bbox_head") and self.bbox_head is not None
 46 | 
 47 |     @property
 48 |     def with_mask(self):
 49 |         """bool: whether the RoI head contains a `mask_head`"""
 50 |         return hasattr(self, "mask_head") and self.mask_head is not None
 51 | 
 52 |     @property
 53 |     def with_shared_head(self):
 54 |         """bool: whether the RoI head contains a `shared_head`"""
 55 |         return hasattr(self, "shared_head") and self.shared_head is not None
 56 | 
 57 |     @abstractmethod
 58 |     def init_weights(self, pretrained):
 59 |         """Initialize the weights in head.
 60 | 
 61 |         Args:
 62 |             pretrained (str, optional): Path to pre-trained weights.
 63 |                 Defaults to None.
 64 |         """
 65 |         pass
 66 | 
 67 |     @abstractmethod
 68 |     def init_bbox_head(self):
 69 |         """Initialize ``bbox_head``"""
 70 |         pass
 71 | 
 72 |     @abstractmethod
 73 |     def init_mask_head(self):
 74 |         """Initialize ``mask_head``"""
 75 |         pass
 76 | 
 77 |     @abstractmethod
 78 |     def init_assigner_sampler(self):
 79 |         """Initialize assigner and sampler."""
 80 |         pass
 81 | 
 82 |     @abstractmethod
 83 |     def forward_train(
 84 |         self,
 85 |         x,
 86 |         img_meta,
 87 |         proposal_list,
 88 |         gt_bboxes,
 89 |         gt_labels,
 90 |         gt_bboxes_ignore=None,
 91 |         gt_masks=None,
 92 |         **kwargs,
 93 |     ):
 94 |         """Forward function during training."""
 95 |         pass
 96 | 
 97 |     async def async_simple_test(self, x, img_meta, **kwargs):
 98 |         """Asynchronized test function."""
 99 |         raise NotImplementedError
100 | 
101 |     def simple_test(
102 |         self, x, proposal_list, img_meta, proposals=None, rescale=False, **kwargs
103 |     ):
104 |         """Test without augmentation."""
105 |         pass
106 | 
107 |     def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
108 |         """Test with augmentations.
109 | 
110 |         If rescale is False, then returned bboxes and masks will fit the scale
111 |         of imgs[0].
112 |         """
113 |         pass
114 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree
  6 | 
  7 | 
  8 | import functools
  9 | 
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | def reduce_loss(loss, reduction):
 14 |     """Reduce loss as specified.
 15 | 
 16 |     Args:
 17 |         loss (Tensor): Elementwise loss tensor.
 18 |         reduction (str): Options are "none", "mean" and "sum".
 19 | 
 20 |     Return:
 21 |         Tensor: Reduced loss tensor.
 22 |     """
 23 |     reduction_enum = F._Reduction.get_enum(reduction)
 24 |     # none: 0, elementwise_mean:1, sum: 2
 25 |     if reduction_enum == 0:
 26 |         return loss
 27 |     elif reduction_enum == 1:
 28 |         return loss.mean()
 29 |     elif reduction_enum == 2:
 30 |         return loss.sum()
 31 | 
 32 | 
 33 | def weight_reduce_loss(loss, weight=None, reduction="mean", avg_factor=None):
 34 |     """Apply element-wise weight and reduce loss.
 35 | 
 36 |     Args:
 37 |         loss (Tensor): Element-wise loss.
 38 |         weight (Tensor): Element-wise weights.
 39 |         reduction (str): Same as built-in losses of PyTorch.
 40 |         avg_factor (float): Avarage factor when computing the mean of losses.
 41 | 
 42 |     Returns:
 43 |         Tensor: Processed loss values.
 44 |     """
 45 |     # if weight is specified, apply element-wise weight
 46 |     if weight is not None:
 47 |         loss = loss * weight
 48 | 
 49 |     # if avg_factor is not specified, just reduce the loss
 50 |     if avg_factor is None:
 51 |         loss = reduce_loss(loss, reduction)
 52 |     else:
 53 |         # if reduction is mean, then average the loss by avg_factor
 54 |         if reduction == "mean":
 55 |             loss = loss.sum() / avg_factor
 56 |         # if reduction is 'none', then do nothing, otherwise raise an error
 57 |         elif reduction != "none":
 58 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 59 |     return loss
 60 | 
 61 | 
 62 | def weighted_loss(loss_func):
 63 |     """Create a weighted version of a given loss function.
 64 | 
 65 |     To use this decorator, the loss function must have the signature like
 66 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
 67 |     element-wise loss without any reduction. This decorator will add weight
 68 |     and reduction arguments to the function. The decorated function will have
 69 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
 70 |     avg_factor=None, **kwargs)`.
 71 | 
 72 |     :Example:
 73 | 
 74 |     >>> import torch
 75 |     >>> @weighted_loss
 76 |     >>> def l1_loss(pred, target):
 77 |     >>>     return (pred - target).abs()
 78 | 
 79 |     >>> pred = torch.Tensor([0, 2, 3])
 80 |     >>> target = torch.Tensor([1, 1, 1])
 81 |     >>> weight = torch.Tensor([1, 0, 1])
 82 | 
 83 |     >>> l1_loss(pred, target)
 84 |     tensor(1.3333)
 85 |     >>> l1_loss(pred, target, weight)
 86 |     tensor(1.)
 87 |     >>> l1_loss(pred, target, reduction='none')
 88 |     tensor([1., 1., 2.])
 89 |     >>> l1_loss(pred, target, weight, avg_factor=2)
 90 |     tensor(1.5000)
 91 |     """
 92 | 
 93 |     @functools.wraps(loss_func)
 94 |     def wrapper(pred, target, weight=None, reduction="mean", avg_factor=None, **kwargs):
 95 |         # get element-wise loss
 96 |         loss = loss_func(pred, target, **kwargs)
 97 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
 98 |         return loss
 99 | 
100 |     return wrapper
101 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/gaussian_focal_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | import torch.nn as nn
 9 | 
10 | from ..builder import LOSSES
11 | from .utils import weighted_loss
12 | 
13 | 
14 | @weighted_loss
15 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
16 |     """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
17 |     distribution.
18 | 
19 |     Args:
20 |         pred (torch.Tensor): The prediction.
21 |         gaussian_target (torch.Tensor): The learning target of the prediction
22 |             in gaussian distribution.
23 |         alpha (float, optional): A balanced form for Focal Loss.
24 |             Defaults to 2.0.
25 |         gamma (float, optional): The gamma for calculating the modulating
26 |             factor. Defaults to 4.0.
27 |     """
28 |     eps = 1e-12
29 |     pos_weights = gaussian_target.eq(1)
30 |     neg_weights = (1 - gaussian_target).pow(gamma)
31 |     pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
32 |     neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
33 |     return pos_loss + neg_loss
34 | 
35 | 
36 | @LOSSES.register_module()
37 | class GaussianFocalLoss(nn.Module):
38 |     """GaussianFocalLoss is a variant of focal loss.
39 | 
40 |     More details can be found in the `paper
41 |     <https://arxiv.org/abs/1808.01244>`_
42 |     Code is modified from `kp_utils.py
43 |     <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501
44 |     Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
45 |     not 0/1 binary target.
46 | 
47 |     Args:
48 |         alpha (float): Power of prediction.
49 |         gamma (float): Power of target for negtive samples.
50 |         reduction (str): Options are "none", "mean" and "sum".
51 |         loss_weight (float): Loss weight of current loss.
52 |     """
53 | 
54 |     def __init__(self, alpha=2.0, gamma=4.0, reduction="mean", loss_weight=1.0):
55 |         super(GaussianFocalLoss, self).__init__()
56 |         self.alpha = alpha
57 |         self.gamma = gamma
58 |         self.reduction = reduction
59 |         self.loss_weight = loss_weight
60 | 
61 |     def forward(
62 |         self, pred, target, weight=None, avg_factor=None, reduction_override=None
63 |     ):
64 |         """Forward function.
65 | 
66 |         Args:
67 |             pred (torch.Tensor): The prediction.
68 |             target (torch.Tensor): The learning target of the prediction
69 |                 in gaussian distribution.
70 |             weight (torch.Tensor, optional): The weight of loss for each
71 |                 prediction. Defaults to None.
72 |             avg_factor (int, optional): Average factor that is used to average
73 |                 the loss. Defaults to None.
74 |             reduction_override (str, optional): The reduction method used to
75 |                 override the original reduction method of the loss.
76 |                 Defaults to None.
77 |         """
78 |         assert reduction_override in (None, "none", "mean", "sum")
79 |         reduction = reduction_override if reduction_override else self.reduction
80 |         loss_reg = self.loss_weight * gaussian_focal_loss(
81 |             pred,
82 |             target,
83 |             weight,
84 |             alpha=self.alpha,
85 |             gamma=self.gamma,
86 |             reduction=reduction,
87 |             avg_factor=avg_factor,
88 |         )
89 |         return loss_reg
90 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree
 6 | 
 7 | 
 8 | from mmcv.cnn.bricks import build_plugin_layer
 9 | from mmcv.runner import force_fp32
10 | from mmdet.models.builder import ROI_EXTRACTORS
11 | 
12 | from .base_roi_extractor import BaseRoIExtractor
13 | 
14 | 
15 | @ROI_EXTRACTORS.register_module()
16 | class GenericRoIExtractor(BaseRoIExtractor):
17 |     """Extract RoI features from all level feature maps levels.
18 | 
19 |     This is the implementation of `A novel Region of Interest Extraction Layer
20 |     for Instance Segmentation <https://arxiv.org/abs/2004.13665>`_.
21 | 
22 |     Args:
23 |         aggregation (str): The method to aggregate multiple feature maps.
24 |             Options are 'sum', 'concat'. Default: 'sum'.
25 |         pre_cfg (dict | None): Specify pre-processing modules. Default: None.
26 |         post_cfg (dict | None): Specify post-processing modules. Default: None.
27 |         kwargs (keyword arguments): Arguments that are the same
28 |             as :class:`BaseRoIExtractor`.
29 |     """
30 | 
31 |     def __init__(self, aggregation="sum", pre_cfg=None, post_cfg=None, **kwargs):
32 |         super(GenericRoIExtractor, self).__init__(**kwargs)
33 | 
34 |         assert aggregation in ["sum", "concat"]
35 | 
36 |         self.aggregation = aggregation
37 |         self.with_post = post_cfg is not None
38 |         self.with_pre = pre_cfg is not None
39 |         # build pre/post processing modules
40 |         if self.with_post:
41 |             self.post_module = build_plugin_layer(post_cfg, "_post_module")[1]
42 |         if self.with_pre:
43 |             self.pre_module = build_plugin_layer(pre_cfg, "_pre_module")[1]
44 | 
45 |     @force_fp32(apply_to=("feats",), out_fp16=True)
46 |     def forward(self, feats, rois, roi_scale_factor=None):
47 |         """Forward function."""
48 |         if len(feats) == 1:
49 |             return self.roi_layers[0](feats[0], rois)
50 | 
51 |         out_size = self.roi_layers[0].output_size
52 |         num_levels = len(feats)
53 |         roi_feats = feats[0].new_zeros(rois.size(0), self.out_channels, *out_size)
54 | 
55 |         # some times rois is an empty tensor
56 |         if roi_feats.shape[0] == 0:
57 |             return roi_feats
58 | 
59 |         if roi_scale_factor is not None:
60 |             rois = self.roi_rescale(rois, roi_scale_factor)
61 | 
62 |         # mark the starting channels for concat mode
63 |         start_channels = 0
64 |         for i in range(num_levels):
65 |             roi_feats_t = self.roi_layers[i](feats[i], rois)
66 |             end_channels = start_channels + roi_feats_t.size(1)
67 |             if self.with_pre:
68 |                 # apply pre-processing to a RoI extracted from each layer
69 |                 roi_feats_t = self.pre_module(roi_feats_t)
70 |             if self.aggregation == "sum":
71 |                 # and sum them all
72 |                 roi_feats += roi_feats_t
73 |             else:
74 |                 # and concat them along channel dimension
75 |                 roi_feats[:, start_channels:end_channels] = roi_feats_t
76 |             # update channels starting position
77 |             start_channels = end_channels
78 |         # check if concat channels match at the end
79 |         if self.aggregation == "concat":
80 |             assert start_channels == self.out_channels
81 | 
82 |         if self.with_post:
83 |             # apply post-processing before return the result
84 |             roi_feats = self.post_module(roi_feats)
85 |         return roi_feats
86 | 


--------------------------------------------------------------------------------