├── .github ├── CODE_OF_CONDUCT.md └── CONTRIBUTING.md ├── .gitignore ├── LICENSE ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── cityscapes_detection.py │ │ ├── cityscapes_instance.py │ │ ├── coco_detection.py │ │ ├── coco_instance.py │ │ ├── coco_instance_semantic.py │ │ ├── deepfashion.py │ │ ├── lvis_v0.5_detection.py │ │ ├── lvis_v0.5_detection_shot.py │ │ ├── lvis_v0.5_instance.py │ │ ├── lvis_v1_instance.py │ │ ├── voc0712.py │ │ └── wider_face.py │ ├── default_runtime.py │ ├── models │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ ├── cascade_rcnn_r50_fpn.py │ │ ├── fast_rcnn_r50_fpn.py │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ ├── faster_rcnn_r50_fpn.py │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── retinanet_r50_fpn.py │ │ ├── rpn_r50_caffe_c4.py │ │ ├── rpn_r50_fpn.py │ │ └── ssd300.py │ └── schedules │ │ ├── schedule_1x.py │ │ ├── schedule_20e.py │ │ └── schedule_2x.py ├── mask_rcnn │ ├── class_agn_mask_rcnn.py │ ├── class_agn_mask_rcnn_gn.py │ ├── class_agn_mask_rcnn_gn_online.py │ ├── class_agn_mask_rcnn_pa.py │ └── mask_rcnn.py ├── oln_box │ ├── class_agn_faster_rcnn.py │ └── oln_box.py └── pairwise_affinity │ ├── pa_extract.py │ └── pa_train.py ├── docker └── Dockerfile ├── docs ├── 1_exist_data_model.md ├── 2_new_data_model.md ├── 3_exist_data_new_model.md ├── Makefile ├── api.rst ├── changelog.md ├── compatibility.md ├── conf.py ├── conventions.md ├── faq.md ├── get_started.md ├── index.rst ├── make.bat ├── model_zoo.md ├── projects.md ├── robustness_benchmarking.md ├── stat.py ├── tutorials │ ├── config.md │ ├── customize_dataset.md │ ├── customize_losses.md │ ├── customize_models.md │ ├── customize_runtime.md │ ├── data_pipeline.md │ ├── finetune.md │ ├── index.rst │ └── pytorch2onnx.md └── useful_tools.md ├── images ├── overview_updated.png └── teaser_large.png ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── builder.py │ │ ├── point_generator.py │ │ └── utils.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── center_region_assigner.py │ │ │ ├── grid_assigner.py │ │ │ ├── hungarian_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ ├── point_assigner.py │ │ │ └── region_assigner.py │ │ ├── builder.py │ │ ├── coder │ │ │ ├── __init__.py │ │ │ ├── base_bbox_coder.py │ │ │ ├── bucketing_bbox_coder.py │ │ │ ├── delta_xywh_bbox_coder.py │ │ │ ├── legacy_delta_xywh_bbox_coder.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ ├── tblr_bbox_coder.py │ │ │ └── yolo_bbox_coder.py │ │ ├── demodata.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── iou2d_calculator.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── match_cost.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── sampling_result.py │ │ │ └── score_hlr_sampler.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── export │ │ ├── __init__.py │ │ └── pytorch2onnx.py │ ├── fp16 │ │ ├── __init__.py │ │ └── deprecated_fp16_utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ ├── structures.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ ├── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py │ └── visualization │ │ ├── __init__.py │ │ └── image.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── coco.py │ ├── coco_split.py │ ├── coco_split_online.py │ ├── coco_split_pseudo_masks.py │ ├── cocoeval_wrappers.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── auto_augment.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ └── utils.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── detectors_resnet.py │ │ ├── detectors_resnext.py │ │ ├── hourglass.py │ │ ├── hrnet.py │ │ ├── regnet.py │ │ ├── res2net.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── ssd_vgg.py │ │ └── trident_resnet.py │ ├── builder.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── base_dense_head.py │ │ ├── dense_test_mixins.py │ │ ├── oln_rpn_head.py │ │ ├── rpn_head.py │ │ └── rpn_test_mixin.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── faster_rcnn.py │ │ ├── mask_rcnn.py │ │ ├── pa_predictor.py │ │ ├── rpn.py │ │ ├── rpn_detector.py │ │ ├── two_stage.py │ │ └── two_tower.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── ae_loss.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── gaussian_focal_loss.py │ │ ├── gfocal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── pisa_loss.py │ │ ├── smooth_l1_loss.py │ │ ├── utils.py │ │ └── varifocal_loss.py │ ├── necks │ │ ├── __init__.py │ │ └── fpn.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── base_roi_head.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ ├── convfc_bbox_head.py │ │ │ └── convfc_bbox_score_head.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── coarse_mask_head.py │ │ │ ├── fcn_mask_head.py │ │ │ ├── fused_semantic_head.py │ │ │ ├── grid_head.py │ │ │ ├── htc_mask_head.py │ │ │ ├── mask_point_head.py │ │ │ └── maskiou_head.py │ │ ├── oln_roi_head.py │ │ ├── rec_roi_head.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ ├── base_roi_extractor.py │ │ │ ├── generic_roi_extractor.py │ │ │ └── single_level_roi_extractor.py │ │ ├── shared_heads │ │ │ ├── __init__.py │ │ │ └── res_layer.py │ │ ├── standard_roi_head.py │ │ └── test_mixins.py │ └── utils │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── gaussian_target.py │ │ ├── positional_encoding.py │ │ ├── res_layer.py │ │ └── transformer.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── logger.py │ ├── profiling.py │ └── util_mixins.py └── version.py ├── pa_lib ├── affinity2mask.py ├── cython_lib │ ├── graph_helper.pyx │ └── setup.py ├── evaluate_helper.py ├── oln_ranker.py └── rag.py ├── pytest.ini ├── requirements.txt ├── requirements ├── build.txt ├── docs.txt ├── optional.txt ├── readthedocs.txt ├── runtime.txt └── tests.txt ├── resources ├── coco_test_12510.jpg ├── corruptions_sev_3.png ├── data_pipeline.png ├── loss_curve.png └── mmdet-logo.png ├── setup.cfg ├── setup.py ├── tests ├── async_benchmark.py ├── test_anchor.py ├── test_assigner.py ├── test_async.py ├── test_coder.py ├── test_config.py ├── test_data │ ├── test_dataset.py │ ├── test_formatting.py │ ├── test_img_augment.py │ ├── test_loading.py │ ├── test_models_aug_test.py │ ├── test_rotate.py │ ├── test_sampler.py │ ├── test_shear.py │ ├── test_transform.py │ ├── test_translate.py │ └── test_utils.py ├── test_eval_hook.py ├── test_fp16.py ├── test_iou2d_calculator.py ├── test_masks.py ├── test_misc.py ├── test_models │ ├── test_backbones.py │ ├── test_forward.py │ ├── test_heads.py │ ├── test_losses.py │ ├── test_necks.py │ ├── test_pisa_heads.py │ ├── test_position_encoding.py │ ├── test_roi_extractor.py │ └── test_transformer.py ├── test_version.py └── test_visualization.py └── tools ├── analyze_logs.py ├── analyze_results.py ├── benchmark.py ├── browse_dataset.py ├── coco_error_analysis.py ├── convert_datasets ├── cityscapes.py └── pascal_voc.py ├── detectron2pytorch.py ├── dist_test_bbox.sh ├── dist_train.sh ├── dist_train_and_test_bbox.sh ├── eval_metric.py ├── extract_pa_masks.py ├── get_flops.py ├── interpolate_extracted_masks.py ├── merge_annotations.py ├── model_test.py ├── print_config.py ├── publish_model.py ├── pytorch2onnx.py ├── regnet2mmdet.py ├── robustness_eval.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── test_pa.py ├── test_robustness.py ├── train.py └── upgrade_model_version.py /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Generic-Grouping 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## Coding Style 30 | * 4 spaces for indentation rather than tabs 31 | 32 | ## License 33 | By contributing to Generic-Grouping, you agree that your contributions will be licensed 34 | under the LICENSE file in the root directory of this source tree. 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | data 108 | .vscode 109 | .idea 110 | .DS_Store 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | work_dirs/ 117 | run_scripts/ 118 | 119 | # Pytorch 120 | *.pth 121 | *.py~ 122 | *.sh~ 123 | 124 | # Token 125 | token.txt 126 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | dataset_type = "CityscapesDataset" 9 | data_root = "data/cityscapes/" 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 12 | ) 13 | train_pipeline = [ 14 | dict(type="LoadImageFromFile"), 15 | dict(type="LoadAnnotations", with_bbox=True), 16 | dict(type="Resize", img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 17 | dict(type="RandomFlip", flip_ratio=0.5), 18 | dict(type="Normalize", **img_norm_cfg), 19 | dict(type="Pad", size_divisor=32), 20 | dict(type="DefaultFormatBundle"), 21 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), 22 | ] 23 | test_pipeline = [ 24 | dict(type="LoadImageFromFile"), 25 | dict( 26 | type="MultiScaleFlipAug", 27 | img_scale=(2048, 1024), 28 | flip=False, 29 | transforms=[ 30 | dict(type="Resize", keep_ratio=True), 31 | dict(type="RandomFlip"), 32 | dict(type="Normalize", **img_norm_cfg), 33 | dict(type="Pad", size_divisor=32), 34 | dict(type="ImageToTensor", keys=["img"]), 35 | dict(type="Collect", keys=["img"]), 36 | ], 37 | ), 38 | ] 39 | data = dict( 40 | samples_per_gpu=1, 41 | workers_per_gpu=2, 42 | train=dict( 43 | type="RepeatDataset", 44 | times=8, 45 | dataset=dict( 46 | type=dataset_type, 47 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_train.json", 48 | img_prefix=data_root + "leftImg8bit/train/", 49 | pipeline=train_pipeline, 50 | ), 51 | ), 52 | val=dict( 53 | type=dataset_type, 54 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_val.json", 55 | img_prefix=data_root + "leftImg8bit/val/", 56 | pipeline=test_pipeline, 57 | ), 58 | test=dict( 59 | type=dataset_type, 60 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_test.json", 61 | img_prefix=data_root + "leftImg8bit/test/", 62 | pipeline=test_pipeline, 63 | ), 64 | ) 65 | evaluation = dict(interval=1, metric="bbox") 66 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | dataset_type = "CityscapesDataset" 9 | data_root = "data/cityscapes/" 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 12 | ) 13 | train_pipeline = [ 14 | dict(type="LoadImageFromFile"), 15 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 16 | dict(type="Resize", img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 17 | dict(type="RandomFlip", flip_ratio=0.5), 18 | dict(type="Normalize", **img_norm_cfg), 19 | dict(type="Pad", size_divisor=32), 20 | dict(type="DefaultFormatBundle"), 21 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), 22 | ] 23 | test_pipeline = [ 24 | dict(type="LoadImageFromFile"), 25 | dict( 26 | type="MultiScaleFlipAug", 27 | img_scale=(2048, 1024), 28 | flip=False, 29 | transforms=[ 30 | dict(type="Resize", keep_ratio=True), 31 | dict(type="RandomFlip"), 32 | dict(type="Normalize", **img_norm_cfg), 33 | dict(type="Pad", size_divisor=32), 34 | dict(type="ImageToTensor", keys=["img"]), 35 | dict(type="Collect", keys=["img"]), 36 | ], 37 | ), 38 | ] 39 | data = dict( 40 | samples_per_gpu=1, 41 | workers_per_gpu=2, 42 | train=dict( 43 | type="RepeatDataset", 44 | times=8, 45 | dataset=dict( 46 | type=dataset_type, 47 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_train.json", 48 | img_prefix=data_root + "leftImg8bit/train/", 49 | pipeline=train_pipeline, 50 | ), 51 | ), 52 | val=dict( 53 | type=dataset_type, 54 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_val.json", 55 | img_prefix=data_root + "leftImg8bit/val/", 56 | pipeline=test_pipeline, 57 | ), 58 | test=dict( 59 | type=dataset_type, 60 | ann_file=data_root + "annotations/instancesonly_filtered_gtFine_test.json", 61 | img_prefix=data_root + "leftImg8bit/test/", 62 | pipeline=test_pipeline, 63 | ), 64 | ) 65 | evaluation = dict(metric=["bbox", "segm"]) 66 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | dataset_type = "CocoDataset" 9 | data_root = "data/coco/" 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 12 | ) 13 | train_pipeline = [ 14 | dict(type="LoadImageFromFile"), 15 | dict(type="LoadAnnotations", with_bbox=True), 16 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), 17 | dict(type="RandomFlip", flip_ratio=0.5), 18 | dict(type="Normalize", **img_norm_cfg), 19 | dict(type="Pad", size_divisor=32), 20 | dict(type="DefaultFormatBundle"), 21 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), 22 | ] 23 | test_pipeline = [ 24 | dict(type="LoadImageFromFile"), 25 | dict( 26 | type="MultiScaleFlipAug", 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type="Resize", keep_ratio=True), 31 | dict(type="RandomFlip"), 32 | dict(type="Normalize", **img_norm_cfg), 33 | dict(type="Pad", size_divisor=32), 34 | dict(type="ImageToTensor", keys=["img"]), 35 | dict(type="Collect", keys=["img"]), 36 | ], 37 | ), 38 | ] 39 | data = dict( 40 | samples_per_gpu=2, 41 | workers_per_gpu=2, 42 | train=dict( 43 | type=dataset_type, 44 | ann_file=data_root + "annotations/instances_train2017.json", 45 | img_prefix=data_root + "train2017/", 46 | pipeline=train_pipeline, 47 | ), 48 | val=dict( 49 | type=dataset_type, 50 | ann_file=data_root + "annotations/instances_val2017.json", 51 | img_prefix=data_root + "val2017/", 52 | pipeline=test_pipeline, 53 | ), 54 | test=dict( 55 | type=dataset_type, 56 | ann_file=data_root + "annotations/instances_val2017.json", 57 | img_prefix=data_root + "val2017/", 58 | pipeline=test_pipeline, 59 | ), 60 | ) 61 | evaluation = dict(interval=1, metric="bbox") 62 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | dataset_type = "CocoDataset" 9 | data_root = "data/coco/" 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 12 | ) 13 | train_pipeline = [ 14 | dict(type="LoadImageFromFile"), 15 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 16 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), 17 | dict(type="RandomFlip", flip_ratio=0.5), 18 | dict(type="Normalize", **img_norm_cfg), 19 | dict(type="Pad", size_divisor=32), 20 | dict(type="DefaultFormatBundle"), 21 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), 22 | ] 23 | test_pipeline = [ 24 | dict(type="LoadImageFromFile"), 25 | dict( 26 | type="MultiScaleFlipAug", 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type="Resize", keep_ratio=True), 31 | dict(type="RandomFlip"), 32 | dict(type="Normalize", **img_norm_cfg), 33 | dict(type="Pad", size_divisor=32), 34 | dict(type="ImageToTensor", keys=["img"]), 35 | dict(type="Collect", keys=["img"]), 36 | ], 37 | ), 38 | ] 39 | data = dict( 40 | samples_per_gpu=2, 41 | workers_per_gpu=2, 42 | train=dict( 43 | type=dataset_type, 44 | ann_file=data_root + "annotations/instances_train2017.json", 45 | img_prefix=data_root + "train2017/", 46 | pipeline=train_pipeline, 47 | ), 48 | val=dict( 49 | type=dataset_type, 50 | ann_file=data_root + "annotations/instances_val2017.json", 51 | img_prefix=data_root + "val2017/", 52 | pipeline=test_pipeline, 53 | ), 54 | test=dict( 55 | type=dataset_type, 56 | ann_file=data_root + "annotations/instances_val2017.json", 57 | img_prefix=data_root + "val2017/", 58 | pipeline=test_pipeline, 59 | ), 60 | ) 61 | evaluation = dict(metric=["bbox", "segm"]) 62 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | dataset_type = "CocoDataset" 9 | data_root = "data/coco/" 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 12 | ) 13 | train_pipeline = [ 14 | dict(type="LoadImageFromFile"), 15 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True, with_seg=True), 16 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), 17 | dict(type="RandomFlip", flip_ratio=0.5), 18 | dict(type="Normalize", **img_norm_cfg), 19 | dict(type="Pad", size_divisor=32), 20 | dict(type="SegRescale", scale_factor=1 / 8), 21 | dict(type="DefaultFormatBundle"), 22 | dict( 23 | type="Collect", 24 | keys=["img", "gt_bboxes", "gt_labels", "gt_masks", "gt_semantic_seg"], 25 | ), 26 | ] 27 | test_pipeline = [ 28 | dict(type="LoadImageFromFile"), 29 | dict( 30 | type="MultiScaleFlipAug", 31 | img_scale=(1333, 800), 32 | flip=False, 33 | transforms=[ 34 | dict(type="Resize", keep_ratio=True), 35 | dict(type="RandomFlip", flip_ratio=0.5), 36 | dict(type="Normalize", **img_norm_cfg), 37 | dict(type="Pad", size_divisor=32), 38 | dict(type="ImageToTensor", keys=["img"]), 39 | dict(type="Collect", keys=["img"]), 40 | ], 41 | ), 42 | ] 43 | data = dict( 44 | samples_per_gpu=2, 45 | workers_per_gpu=2, 46 | train=dict( 47 | type=dataset_type, 48 | ann_file=data_root + "annotations/instances_train2017.json", 49 | img_prefix=data_root + "train2017/", 50 | seg_prefix=data_root + "stuffthingmaps/train2017/", 51 | pipeline=train_pipeline, 52 | ), 53 | val=dict( 54 | type=dataset_type, 55 | ann_file=data_root + "annotations/instances_val2017.json", 56 | img_prefix=data_root + "val2017/", 57 | pipeline=test_pipeline, 58 | ), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + "annotations/instances_val2017.json", 62 | img_prefix=data_root + "val2017/", 63 | pipeline=test_pipeline, 64 | ), 65 | ) 66 | evaluation = dict(metric=["bbox", "segm"]) 67 | -------------------------------------------------------------------------------- /configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # dataset settings 9 | dataset_type = "DeepFashionDataset" 10 | data_root = "data/DeepFashion/In-shop/" 11 | img_norm_cfg = dict( 12 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 13 | ) 14 | train_pipeline = [ 15 | dict(type="LoadImageFromFile"), 16 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 17 | dict(type="Resize", img_scale=(750, 1101), keep_ratio=True), 18 | dict(type="RandomFlip", flip_ratio=0.5), 19 | dict(type="Normalize", **img_norm_cfg), 20 | dict(type="Pad", size_divisor=32), 21 | dict(type="DefaultFormatBundle"), 22 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), 23 | ] 24 | test_pipeline = [ 25 | dict(type="LoadImageFromFile"), 26 | dict( 27 | type="MultiScaleFlipAug", 28 | img_scale=(750, 1101), 29 | flip=False, 30 | transforms=[ 31 | dict(type="Resize", keep_ratio=True), 32 | dict(type="RandomFlip"), 33 | dict(type="Normalize", **img_norm_cfg), 34 | dict(type="Pad", size_divisor=32), 35 | dict(type="ImageToTensor", keys=["img"]), 36 | dict(type="Collect", keys=["img"]), 37 | ], 38 | ), 39 | ] 40 | data = dict( 41 | imgs_per_gpu=2, 42 | workers_per_gpu=1, 43 | train=dict( 44 | type=dataset_type, 45 | ann_file=data_root + "annotations/DeepFashion_segmentation_query.json", 46 | img_prefix=data_root + "Img/", 47 | pipeline=train_pipeline, 48 | data_root=data_root, 49 | ), 50 | val=dict( 51 | type=dataset_type, 52 | ann_file=data_root + "annotations/DeepFashion_segmentation_query.json", 53 | img_prefix=data_root + "Img/", 54 | pipeline=test_pipeline, 55 | data_root=data_root, 56 | ), 57 | test=dict( 58 | type=dataset_type, 59 | ann_file=data_root + "annotations/DeepFashion_segmentation_gallery.json", 60 | img_prefix=data_root + "Img/", 61 | pipeline=test_pipeline, 62 | data_root=data_root, 63 | ), 64 | ) 65 | evaluation = dict(interval=5, metric=["bbox", "segm"]) 66 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_detection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | _base_ = "coco_detection.py" 9 | dataset_type = "LVISV05Dataset" 10 | data_root = "data/LVIS/" 11 | data = dict( 12 | samples_per_gpu=2, 13 | workers_per_gpu=2, 14 | train=dict( 15 | _delete_=True, 16 | type="ClassBalancedDataset", 17 | oversample_thr=1e-3, 18 | dataset=dict( 19 | type=dataset_type, 20 | ann_file=data_root + "annotations/lvis_v0.5_train.json", 21 | # ann_file=data_root + 'annotations/lvis_v0.5_train_10.json', 22 | img_prefix=data_root + "train2017/", 23 | ), 24 | ), 25 | val=dict( 26 | type=dataset_type, 27 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 28 | img_prefix=data_root + "val2017/", 29 | ), 30 | test=dict( 31 | type=dataset_type, 32 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 33 | img_prefix=data_root + "val2017/", 34 | ), 35 | ) 36 | evaluation = dict(metric=["bbox", "segm"]) 37 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_detection_shot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | _base_ = "coco_detection.py" 9 | dataset_type = "LVISV05Dataset" 10 | data_root = "data/LVIS/" 11 | data = dict( 12 | samples_per_gpu=2, 13 | workers_per_gpu=2, 14 | train=dict( 15 | _delete_=True, 16 | type="ClassBalancedDataset", 17 | oversample_thr=1e-3, 18 | dataset=dict( 19 | type=dataset_type, 20 | # ann_file=data_root + 'annotations/lvis_v0.5_train.json', 21 | ann_file=data_root + "annotations/lvis_v0.5_train_10.json", 22 | img_prefix=data_root + "train2017/", 23 | ), 24 | ), 25 | val=dict( 26 | type=dataset_type, 27 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 28 | img_prefix=data_root + "val2017/", 29 | ), 30 | test=dict( 31 | type=dataset_type, 32 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 33 | img_prefix=data_root + "val2017/", 34 | ), 35 | ) 36 | evaluation = dict(metric=["bbox", "segm"]) 37 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | _base_ = "coco_instance.py" 9 | dataset_type = "LVISV05Dataset" 10 | # data_root = 'data/lvis_v0.5/' 11 | data_root = "/data2/LVIS/" 12 | data = dict( 13 | samples_per_gpu=2, 14 | workers_per_gpu=2, 15 | train=dict( 16 | _delete_=True, 17 | type="ClassBalancedDataset", 18 | oversample_thr=1e-3, 19 | dataset=dict( 20 | type=dataset_type, 21 | ann_file=data_root + "annotations/lvis_v0.5_train.json", 22 | img_prefix=data_root + "train2017/", 23 | ), 24 | ), 25 | val=dict( 26 | type=dataset_type, 27 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 28 | img_prefix=data_root + "val2017/", 29 | ), 30 | test=dict( 31 | type=dataset_type, 32 | ann_file=data_root + "annotations/lvis_v0.5_val.json", 33 | img_prefix=data_root + "val2017/", 34 | ), 35 | ) 36 | evaluation = dict(metric=["bbox", "segm"]) 37 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | _base_ = "coco_instance.py" 9 | dataset_type = "LVISV1Dataset" 10 | data_root = "data/lvis_v1/" 11 | data = dict( 12 | samples_per_gpu=2, 13 | workers_per_gpu=2, 14 | train=dict( 15 | _delete_=True, 16 | type="ClassBalancedDataset", 17 | oversample_thr=1e-3, 18 | dataset=dict( 19 | type=dataset_type, 20 | ann_file=data_root + "annotations/lvis_v1_train.json", 21 | img_prefix=data_root, 22 | ), 23 | ), 24 | val=dict( 25 | type=dataset_type, 26 | ann_file=data_root + "annotations/lvis_v1_val.json", 27 | img_prefix=data_root, 28 | ), 29 | test=dict( 30 | type=dataset_type, 31 | ann_file=data_root + "annotations/lvis_v1_val.json", 32 | img_prefix=data_root, 33 | ), 34 | ) 35 | evaluation = dict(metric=["bbox", "segm"]) 36 | -------------------------------------------------------------------------------- /configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # dataset settings 9 | dataset_type = "VOCDataset" 10 | data_root = "data/VOCdevkit/" 11 | img_norm_cfg = dict( 12 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 13 | ) 14 | train_pipeline = [ 15 | dict(type="LoadImageFromFile"), 16 | dict(type="LoadAnnotations", with_bbox=True), 17 | dict(type="Resize", img_scale=(1000, 600), keep_ratio=True), 18 | dict(type="RandomFlip", flip_ratio=0.5), 19 | dict(type="Normalize", **img_norm_cfg), 20 | dict(type="Pad", size_divisor=32), 21 | dict(type="DefaultFormatBundle"), 22 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), 23 | ] 24 | test_pipeline = [ 25 | dict(type="LoadImageFromFile"), 26 | dict( 27 | type="MultiScaleFlipAug", 28 | img_scale=(1000, 600), 29 | flip=False, 30 | transforms=[ 31 | dict(type="Resize", keep_ratio=True), 32 | dict(type="RandomFlip"), 33 | dict(type="Normalize", **img_norm_cfg), 34 | dict(type="Pad", size_divisor=32), 35 | dict(type="ImageToTensor", keys=["img"]), 36 | dict(type="Collect", keys=["img"]), 37 | ], 38 | ), 39 | ] 40 | data = dict( 41 | samples_per_gpu=2, 42 | workers_per_gpu=2, 43 | train=dict( 44 | type="RepeatDataset", 45 | times=3, 46 | dataset=dict( 47 | type=dataset_type, 48 | ann_file=[ 49 | data_root + "VOC2007/ImageSets/Main/trainval.txt", 50 | data_root + "VOC2012/ImageSets/Main/trainval.txt", 51 | ], 52 | img_prefix=[data_root + "VOC2007/", data_root + "VOC2012/"], 53 | pipeline=train_pipeline, 54 | ), 55 | ), 56 | val=dict( 57 | type=dataset_type, 58 | ann_file=data_root + "VOC2007/ImageSets/Main/test.txt", 59 | img_prefix=data_root + "VOC2007/", 60 | pipeline=test_pipeline, 61 | ), 62 | test=dict( 63 | type=dataset_type, 64 | ann_file=data_root + "VOC2007/ImageSets/Main/test.txt", 65 | img_prefix=data_root + "VOC2007/", 66 | pipeline=test_pipeline, 67 | ), 68 | ) 69 | evaluation = dict(interval=1, metric="mAP") 70 | -------------------------------------------------------------------------------- /configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # dataset settings 9 | dataset_type = "WIDERFaceDataset" 10 | data_root = "data/WIDERFace/" 11 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 12 | train_pipeline = [ 13 | dict(type="LoadImageFromFile", to_float32=True), 14 | dict(type="LoadAnnotations", with_bbox=True), 15 | dict( 16 | type="PhotoMetricDistortion", 17 | brightness_delta=32, 18 | contrast_range=(0.5, 1.5), 19 | saturation_range=(0.5, 1.5), 20 | hue_delta=18, 21 | ), 22 | dict( 23 | type="Expand", 24 | mean=img_norm_cfg["mean"], 25 | to_rgb=img_norm_cfg["to_rgb"], 26 | ratio_range=(1, 4), 27 | ), 28 | dict( 29 | type="MinIoURandomCrop", min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3 30 | ), 31 | dict(type="Resize", img_scale=(300, 300), keep_ratio=False), 32 | dict(type="Normalize", **img_norm_cfg), 33 | dict(type="RandomFlip", flip_ratio=0.5), 34 | dict(type="DefaultFormatBundle"), 35 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), 36 | ] 37 | test_pipeline = [ 38 | dict(type="LoadImageFromFile"), 39 | dict( 40 | type="MultiScaleFlipAug", 41 | img_scale=(300, 300), 42 | flip=False, 43 | transforms=[ 44 | dict(type="Resize", keep_ratio=False), 45 | dict(type="Normalize", **img_norm_cfg), 46 | dict(type="ImageToTensor", keys=["img"]), 47 | dict(type="Collect", keys=["img"]), 48 | ], 49 | ), 50 | ] 51 | data = dict( 52 | samples_per_gpu=60, 53 | workers_per_gpu=2, 54 | train=dict( 55 | type="RepeatDataset", 56 | times=2, 57 | dataset=dict( 58 | type=dataset_type, 59 | ann_file=data_root + "train.txt", 60 | img_prefix=data_root + "WIDER_train/", 61 | min_size=17, 62 | pipeline=train_pipeline, 63 | ), 64 | ), 65 | val=dict( 66 | type=dataset_type, 67 | ann_file=data_root + "val.txt", 68 | img_prefix=data_root + "WIDER_val/", 69 | pipeline=test_pipeline, 70 | ), 71 | test=dict( 72 | type=dataset_type, 73 | ann_file=data_root + "val.txt", 74 | img_prefix=data_root + "WIDER_val/", 75 | pipeline=test_pipeline, 76 | ), 77 | ) 78 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | checkpoint_config = dict(interval=1) 9 | # yapf:disable 10 | log_config = dict( 11 | interval=50, 12 | hooks=[ 13 | dict(type="TextLoggerHook"), 14 | # dict(type='TensorboardLoggerHook') 15 | ], 16 | ) 17 | # yapf:enable 18 | dist_params = dict(backend="nccl") 19 | log_level = "INFO" 20 | load_from = None 21 | resume_from = None 22 | workflow = [("train", 1)] 23 | -------------------------------------------------------------------------------- /configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # model settings 9 | model = dict( 10 | type="FastRCNN", 11 | pretrained="torchvision://resnet50", 12 | backbone=dict( 13 | type="ResNet", 14 | depth=50, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type="BN", requires_grad=True), 19 | norm_eval=True, 20 | style="pytorch", 21 | ), 22 | neck=dict( 23 | type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5 24 | ), 25 | roi_head=dict( 26 | type="StandardRoIHead", 27 | bbox_roi_extractor=dict( 28 | type="SingleRoIExtractor", 29 | roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), 30 | out_channels=256, 31 | featmap_strides=[4, 8, 16, 32], 32 | ), 33 | bbox_head=dict( 34 | type="Shared2FCBBoxHead", 35 | in_channels=256, 36 | fc_out_channels=1024, 37 | roi_feat_size=7, 38 | num_classes=80, 39 | bbox_coder=dict( 40 | type="DeltaXYWHBBoxCoder", 41 | target_means=[0.0, 0.0, 0.0, 0.0], 42 | target_stds=[0.1, 0.1, 0.2, 0.2], 43 | ), 44 | reg_class_agnostic=False, 45 | loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), 46 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 47 | ), 48 | ), 49 | # model training and testing settings 50 | train_cfg=dict( 51 | rcnn=dict( 52 | assigner=dict( 53 | type="MaxIoUAssigner", 54 | pos_iou_thr=0.5, 55 | neg_iou_thr=0.5, 56 | min_pos_iou=0.5, 57 | match_low_quality=False, 58 | ignore_iof_thr=-1, 59 | ), 60 | sampler=dict( 61 | type="RandomSampler", 62 | num=512, 63 | pos_fraction=0.25, 64 | neg_pos_ub=-1, 65 | add_gt_as_proposals=True, 66 | ), 67 | pos_weight=-1, 68 | debug=False, 69 | ) 70 | ), 71 | test_cfg=dict( 72 | rcnn=dict( 73 | score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100 74 | ) 75 | ), 76 | ) 77 | -------------------------------------------------------------------------------- /configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # model settings 9 | model = dict( 10 | type="RetinaNet", 11 | pretrained="torchvision://resnet50", 12 | backbone=dict( 13 | type="ResNet", 14 | depth=50, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type="BN", requires_grad=True), 19 | norm_eval=True, 20 | style="pytorch", 21 | ), 22 | neck=dict( 23 | type="FPN", 24 | in_channels=[256, 512, 1024, 2048], 25 | out_channels=256, 26 | start_level=1, 27 | add_extra_convs="on_input", 28 | num_outs=5, 29 | ), 30 | bbox_head=dict( 31 | type="RetinaHead", 32 | num_classes=80, 33 | in_channels=256, 34 | stacked_convs=4, 35 | feat_channels=256, 36 | anchor_generator=dict( 37 | type="AnchorGenerator", 38 | octave_base_scale=4, 39 | scales_per_octave=3, 40 | ratios=[0.5, 1.0, 2.0], 41 | strides=[8, 16, 32, 64, 128], 42 | ), 43 | bbox_coder=dict( 44 | type="DeltaXYWHBBoxCoder", 45 | target_means=[0.0, 0.0, 0.0, 0.0], 46 | target_stds=[1.0, 1.0, 1.0, 1.0], 47 | ), 48 | loss_cls=dict( 49 | type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0 50 | ), 51 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 52 | ), 53 | # training and testing settings 54 | train_cfg=dict( 55 | assigner=dict( 56 | type="MaxIoUAssigner", 57 | pos_iou_thr=0.5, 58 | neg_iou_thr=0.4, 59 | min_pos_iou=0, 60 | ignore_iof_thr=-1, 61 | ), 62 | allowed_border=-1, 63 | pos_weight=-1, 64 | debug=False, 65 | ), 66 | test_cfg=dict( 67 | nms_pre=1000, 68 | min_bbox_size=0, 69 | score_thr=0.05, 70 | nms=dict(type="nms", iou_threshold=0.5), 71 | max_per_img=100, 72 | ), 73 | ) 74 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # model settings 9 | model = dict( 10 | type="RPN", 11 | pretrained="open-mmlab://detectron2/resnet50_caffe", 12 | backbone=dict( 13 | type="ResNet", 14 | depth=50, 15 | num_stages=3, 16 | strides=(1, 2, 2), 17 | dilations=(1, 1, 1), 18 | out_indices=(2,), 19 | frozen_stages=1, 20 | norm_cfg=dict(type="BN", requires_grad=False), 21 | norm_eval=True, 22 | style="caffe", 23 | ), 24 | neck=None, 25 | rpn_head=dict( 26 | type="RPNHead", 27 | in_channels=1024, 28 | feat_channels=1024, 29 | anchor_generator=dict( 30 | type="AnchorGenerator", 31 | scales=[2, 4, 8, 16, 32], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[16], 34 | ), 35 | bbox_coder=dict( 36 | type="DeltaXYWHBBoxCoder", 37 | target_means=[0.0, 0.0, 0.0, 0.0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0], 39 | ), 40 | loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 42 | ), 43 | # model training and testing settings 44 | train_cfg=dict( 45 | rpn=dict( 46 | assigner=dict( 47 | type="MaxIoUAssigner", 48 | pos_iou_thr=0.7, 49 | neg_iou_thr=0.3, 50 | min_pos_iou=0.3, 51 | ignore_iof_thr=-1, 52 | ), 53 | sampler=dict( 54 | type="RandomSampler", 55 | num=256, 56 | pos_fraction=0.5, 57 | neg_pos_ub=-1, 58 | add_gt_as_proposals=False, 59 | ), 60 | allowed_border=0, 61 | pos_weight=-1, 62 | debug=False, 63 | ) 64 | ), 65 | test_cfg=dict( 66 | rpn=dict( 67 | nms_across_levels=False, 68 | nms_pre=12000, 69 | nms_post=2000, 70 | max_num=2000, 71 | nms_thr=0.7, 72 | min_bbox_size=0, 73 | ) 74 | ), 75 | ) 76 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # model settings 9 | model = dict( 10 | type="RPN", 11 | pretrained="torchvision://resnet50", 12 | backbone=dict( 13 | type="ResNet", 14 | depth=50, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type="BN", requires_grad=True), 19 | norm_eval=True, 20 | style="pytorch", 21 | ), 22 | neck=dict( 23 | type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5 24 | ), 25 | rpn_head=dict( 26 | type="RPNHead", 27 | in_channels=256, 28 | feat_channels=256, 29 | anchor_generator=dict( 30 | type="AnchorGenerator", 31 | scales=[8], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[4, 8, 16, 32, 64], 34 | ), 35 | bbox_coder=dict( 36 | type="DeltaXYWHBBoxCoder", 37 | target_means=[0.0, 0.0, 0.0, 0.0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0], 39 | ), 40 | loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 42 | ), 43 | # model training and testing settings 44 | train_cfg=dict( 45 | rpn=dict( 46 | assigner=dict( 47 | type="MaxIoUAssigner", 48 | pos_iou_thr=0.7, 49 | neg_iou_thr=0.3, 50 | min_pos_iou=0.3, 51 | ignore_iof_thr=-1, 52 | ), 53 | sampler=dict( 54 | type="RandomSampler", 55 | num=256, 56 | pos_fraction=0.5, 57 | neg_pos_ub=-1, 58 | add_gt_as_proposals=False, 59 | ), 60 | allowed_border=0, 61 | pos_weight=-1, 62 | debug=False, 63 | ) 64 | ), 65 | test_cfg=dict( 66 | rpn=dict( 67 | nms_across_levels=False, 68 | nms_pre=2000, 69 | nms_post=1000, 70 | max_num=1000, 71 | nms_thr=0.7, 72 | min_bbox_size=0, 73 | ) 74 | ), 75 | ) 76 | -------------------------------------------------------------------------------- /configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # model settings 9 | input_size = 300 10 | model = dict( 11 | type="SingleStageDetector", 12 | pretrained="open-mmlab://vgg16_caffe", 13 | backbone=dict( 14 | type="SSDVGG", 15 | input_size=input_size, 16 | depth=16, 17 | with_last_pool=False, 18 | ceil_mode=True, 19 | out_indices=(3, 4), 20 | out_feature_indices=(22, 34), 21 | l2_norm_scale=20, 22 | ), 23 | neck=None, 24 | bbox_head=dict( 25 | type="SSDHead", 26 | in_channels=(512, 1024, 512, 256, 256, 256), 27 | num_classes=80, 28 | anchor_generator=dict( 29 | type="SSDAnchorGenerator", 30 | scale_major=False, 31 | input_size=input_size, 32 | basesize_ratio_range=(0.15, 0.9), 33 | strides=[8, 16, 32, 64, 100, 300], 34 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]], 35 | ), 36 | bbox_coder=dict( 37 | type="DeltaXYWHBBoxCoder", 38 | target_means=[0.0, 0.0, 0.0, 0.0], 39 | target_stds=[0.1, 0.1, 0.2, 0.2], 40 | ), 41 | ), 42 | train_cfg=dict( 43 | assigner=dict( 44 | type="MaxIoUAssigner", 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.0, 48 | ignore_iof_thr=-1, 49 | gt_max_assign_all=False, 50 | ), 51 | smoothl1_beta=1.0, 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | neg_pos_ratio=3, 55 | debug=False, 56 | ), 57 | test_cfg=dict( 58 | nms=dict(type="nms", iou_threshold=0.45), 59 | min_bbox_size=0, 60 | score_thr=0.02, 61 | max_per_img=200, 62 | ), 63 | ) 64 | cudnn_benchmark = True 65 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # optimizer 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) 10 | optimizer_config = dict(grad_clip=None) 11 | # learning policy 12 | lr_config = dict( 13 | policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11] 14 | ) 15 | total_epochs = 12 16 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # optimizer 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) 10 | optimizer_config = dict(grad_clip=None) 11 | # learning policy 12 | lr_config = dict( 13 | policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 19] 14 | ) 15 | total_epochs = 20 16 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # optimizer 9 | optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) 10 | optimizer_config = dict(grad_clip=None) 11 | # learning policy 12 | lr_config = dict( 13 | policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22] 14 | ) 15 | total_epochs = 24 16 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.6.0" 2 | ARG CUDA="10.1" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 10 | 11 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # Install MMCV 16 | RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html 17 | 18 | # Install MMDetection 19 | RUN conda clean --all 20 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 21 | WORKDIR /mmdetection 22 | ENV FORCE_CUDA="1" 23 | RUN pip install -r requirements/build.txt 24 | RUN pip install --no-cache-dir -e . 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ================= 3 | 4 | mmdet.apis 5 | -------------- 6 | .. automodule:: mmdet.apis 7 | :members: 8 | 9 | mmdet.core 10 | -------------- 11 | 12 | anchor 13 | ^^^^^^^^^^ 14 | .. automodule:: mmdet.core.anchor 15 | :members: 16 | 17 | bbox 18 | ^^^^^^^^^^ 19 | .. automodule:: mmdet.core.bbox 20 | :members: 21 | 22 | export 23 | ^^^^^^^^^^ 24 | .. automodule:: mmdet.core.export 25 | :members: 26 | 27 | mask 28 | ^^^^^^^^^^ 29 | .. automodule:: mmdet.core.mask 30 | :members: 31 | 32 | evaluation 33 | ^^^^^^^^^^ 34 | .. automodule:: mmdet.core.evaluation 35 | :members: 36 | 37 | post_processing 38 | ^^^^^^^^^^^^^^^ 39 | .. automodule:: mmdet.core.post_processing 40 | :members: 41 | 42 | optimizer 43 | ^^^^^^^^^^ 44 | .. automodule:: mmdet.core.optimizer 45 | :members: 46 | 47 | utils 48 | ^^^^^^^^^^ 49 | .. automodule:: mmdet.core.utils 50 | :members: 51 | 52 | mmdet.datasets 53 | -------------- 54 | 55 | datasets 56 | ^^^^^^^^^^ 57 | .. automodule:: mmdet.datasets 58 | :members: 59 | 60 | pipelines 61 | ^^^^^^^^^^ 62 | .. automodule:: mmdet.datasets.pipelines 63 | :members: 64 | 65 | mmdet.models 66 | -------------- 67 | 68 | detectors 69 | ^^^^^^^^^^ 70 | .. automodule:: mmdet.models.detectors 71 | :members: 72 | 73 | backbones 74 | ^^^^^^^^^^ 75 | .. automodule:: mmdet.models.backbones 76 | :members: 77 | 78 | necks 79 | ^^^^^^^^^^^^ 80 | .. automodule:: mmdet.models.necks 81 | :members: 82 | 83 | dense_heads 84 | ^^^^^^^^^^^^ 85 | .. automodule:: mmdet.models.dense_heads 86 | :members: 87 | 88 | roi_heads 89 | ^^^^^^^^^^ 90 | .. automodule:: mmdet.models.roi_heads 91 | :members: 92 | 93 | losses 94 | ^^^^^^^^^^ 95 | .. automodule:: mmdet.models.losses 96 | :members: 97 | 98 | utils 99 | ^^^^^^^^^^ 100 | .. automodule:: mmdet.models.utils 101 | :members: 102 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # Configuration file for the Sphinx documentation builder. 9 | # 10 | # This file only contains a selection of the most common options. For a full 11 | # list see the documentation: 12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 13 | 14 | # -- Path setup -------------------------------------------------------------- 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import subprocess 22 | import sys 23 | 24 | sys.path.insert(0, os.path.abspath("..")) 25 | 26 | # -- Project information ----------------------------------------------------- 27 | 28 | project = "MMDetection" 29 | copyright = "2018-2020, OpenMMLab" 30 | author = "MMDetection Authors" 31 | version_file = "../mmdet/version.py" 32 | 33 | 34 | def get_version(): 35 | with open(version_file, "r") as f: 36 | exec(compile(f.read(), version_file, "exec")) 37 | return locals()["__version__"] 38 | 39 | 40 | # The full version, including alpha/beta/rc tags 41 | release = get_version() 42 | 43 | # -- General configuration --------------------------------------------------- 44 | 45 | # Add any Sphinx extension module names here, as strings. They can be 46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 47 | # ones. 48 | extensions = [ 49 | "sphinx.ext.autodoc", 50 | "sphinx.ext.napoleon", 51 | "sphinx.ext.viewcode", 52 | "recommonmark", 53 | "sphinx_markdown_tables", 54 | ] 55 | 56 | autodoc_mock_imports = [ 57 | "matplotlib", 58 | "pycocotools", 59 | "terminaltables", 60 | "mmdet.version", 61 | "mmcv.ops", 62 | ] 63 | 64 | # Add any paths that contain templates here, relative to this directory. 65 | templates_path = ["_templates"] 66 | 67 | # The suffix(es) of source filenames. 68 | # You can specify multiple suffix as a list of string: 69 | # 70 | source_suffix = { 71 | ".rst": "restructuredtext", 72 | ".md": "markdown", 73 | } 74 | 75 | # The master toctree document. 76 | master_doc = "index" 77 | 78 | # List of patterns, relative to source directory, that match files and 79 | # directories to ignore when looking for source files. 80 | # This pattern also affects html_static_path and html_extra_path. 81 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 82 | 83 | # -- Options for HTML output ------------------------------------------------- 84 | 85 | # The theme to use for HTML and HTML Help pages. See the documentation for 86 | # a list of builtin themes. 87 | # 88 | html_theme = "sphinx_rtd_theme" 89 | 90 | # Add any paths that contain custom static files (such as style sheets) here, 91 | # relative to this directory. They are copied after the builtin static files, 92 | # so a file named "default.css" will overwrite the builtin "default.css". 93 | html_static_path = ["_static"] 94 | 95 | 96 | def builder_inited_handler(app): 97 | subprocess.run(["./stat.py"]) 98 | 99 | 100 | def setup(app): 101 | app.connect("builder-inited", builder_inited_handler) 102 | -------------------------------------------------------------------------------- /docs/conventions.md: -------------------------------------------------------------------------------- 1 | # Conventions 2 | 3 | Please check the following conventions if you would like to modify MMDetection as your own project. 4 | 5 | ## Loss 6 | 7 | In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`. 8 | 9 | For example, in bbox head, 10 | 11 | ```python 12 | class BBoxHead(nn.Module): 13 | ... 14 | def loss(self, ...): 15 | losses = dict() 16 | # classification loss 17 | losses['loss_cls'] = self.loss_cls(...) 18 | # classification accuracy 19 | losses['acc'] = accuracy(...) 20 | # bbox regression loss 21 | losses['loss_bbox'] = self.loss_bbox(...) 22 | return losses 23 | ``` 24 | 25 | `bbox_head.loss()` will be called during model forward. 26 | The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` . 27 | Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation, 28 | `'acc'` will only be used as a metric to monitor training process. 29 | 30 | By default, only values whose keys contain `'loss'` will be back propagated. 31 | This behavior could be changed by modifying `BaseDetector.train_step()`. 32 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to MMDetection's documentation! 2 | ======================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Get Started 7 | 8 | get_started.md 9 | modelzoo_statistics.md 10 | model_zoo.md 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: Quick Run 15 | 16 | 1_exist_data_model.md 17 | 2_new_data_model.md 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | :caption: Tutorials 22 | 23 | tutorials/index.rst 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | :caption: Useful Tools and Scripts 28 | 29 | useful_tools.md 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: Notes 34 | 35 | conventions.md 36 | compatibility.md 37 | projects.md 38 | changelog.md 39 | faq.md 40 | 41 | .. toctree:: 42 | :caption: API Reference 43 | 44 | api.rst 45 | 46 | Indices and tables 47 | ================== 48 | 49 | * :ref:`genindex` 50 | * :ref:`search` 51 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | #!/usr/bin/env python 9 | import functools as func 10 | import glob 11 | import os.path as osp 12 | import re 13 | 14 | import numpy as np 15 | 16 | url_prefix = "https://github.com/open-mmlab/mmdetection/blob/master/" 17 | 18 | files = sorted(glob.glob("../configs/*/README.md")) 19 | 20 | stats = [] 21 | titles = [] 22 | num_ckpts = 0 23 | 24 | for f in files: 25 | url = osp.dirname(f.replace("../", url_prefix)) 26 | 27 | with open(f, "r") as content_file: 28 | content = content_file.read() 29 | 30 | title = content.split("\n")[0].replace("# ", "").strip() 31 | ckpts = set( 32 | x.lower().strip() for x in re.findall(r"\[model\]\((https?.*)\)", content) 33 | ) 34 | 35 | if len(ckpts) == 0: 36 | continue 37 | 38 | _papertype = [x for x in re.findall(r"\[([A-Z]+)\]", content)] 39 | assert len(_papertype) > 0 40 | papertype = _papertype[0] 41 | 42 | paper = set([(papertype, title)]) 43 | 44 | titles.append(title) 45 | num_ckpts += len(ckpts) 46 | 47 | statsmsg = f""" 48 | \t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts) 49 | """ 50 | stats.append((paper, ckpts, statsmsg)) 51 | 52 | allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) 53 | msglist = "\n".join(x for _, _, x in stats) 54 | 55 | papertypes, papercounts = np.unique([t for t, _ in allpapers], return_counts=True) 56 | countstr = "\n".join([f" - {t}: {c}" for t, c in zip(papertypes, papercounts)]) 57 | 58 | modelzoo = f""" 59 | # Model Zoo Statistics 60 | 61 | * Number of papers: {len(set(titles))} 62 | {countstr} 63 | 64 | * Number of checkpoints: {num_ckpts} 65 | 66 | {msglist} 67 | """ 68 | 69 | with open("modelzoo_statistics.md", "w") as f: 70 | f.write(modelzoo) 71 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | config.md 5 | customize_dataset.md 6 | data_pipeline.md 7 | customize_models.md 8 | customize_runtime.md 9 | customize_losses.md 10 | finetune.md 11 | pytorch2onnx.md 12 | -------------------------------------------------------------------------------- /images/overview_updated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/images/overview_updated.png -------------------------------------------------------------------------------- /images/teaser_large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/images/teaser_large.png -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import mmcv 9 | 10 | from .version import __version__, short_version 11 | 12 | 13 | def digit_version(version_str): 14 | digit_version = [] 15 | for x in version_str.split("."): 16 | if x.isdigit(): 17 | digit_version.append(int(x)) 18 | elif x.find("rc") != -1: 19 | patch_version = x.split("rc") 20 | digit_version.append(int(patch_version[0]) - 1) 21 | digit_version.append(int(patch_version[1])) 22 | return digit_version 23 | 24 | 25 | mmcv_minimum_version = "1.2.4" 26 | # mmcv_maximum_version = '1.3' 27 | mmcv_version = digit_version(mmcv.__version__) 28 | 29 | 30 | assert mmcv_version >= digit_version(mmcv_minimum_version) 31 | # and mmcv_version <= digit_version(mmcv_maximum_version)), \ 32 | # f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 33 | # f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 34 | 35 | __all__ = ["__version__", "short_version"] 36 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .inference import ( 9 | async_inference_detector, 10 | inference_detector, 11 | init_detector, 12 | show_result_pyplot, 13 | ) 14 | from .test import ( 15 | collect_results_cpu, 16 | collect_results_gpu, 17 | multi_gpu_test, 18 | single_gpu_test, 19 | ) 20 | from .train import get_root_logger, set_random_seed, train_detector 21 | 22 | __all__ = [ 23 | "get_root_logger", 24 | "set_random_seed", 25 | "train_detector", 26 | "init_detector", 27 | "async_inference_detector", 28 | "inference_detector", 29 | "show_result_pyplot", 30 | "multi_gpu_test", 31 | "single_gpu_test", 32 | "collect_results_cpu", 33 | "collect_results_gpu", 34 | ] 35 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .anchor import * # noqa: F401, F403 9 | from .bbox import * # noqa: F401, F403 10 | from .evaluation import * # noqa: F401, F403 11 | from .export import * # noqa: F401, F403 12 | from .fp16 import * # noqa: F401, F403 13 | from .mask import * # noqa: F401, F403 14 | from .post_processing import * # noqa: F401, F403 15 | from .utils import * # noqa: F401, F403 16 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .anchor_generator import ( 9 | AnchorGenerator, 10 | LegacyAnchorGenerator, 11 | YOLOAnchorGenerator, 12 | ) 13 | from .builder import ANCHOR_GENERATORS, build_anchor_generator 14 | from .point_generator import PointGenerator 15 | from .utils import anchor_inside_flags, calc_region, images_to_levels 16 | 17 | __all__ = [ 18 | "AnchorGenerator", 19 | "LegacyAnchorGenerator", 20 | "anchor_inside_flags", 21 | "PointGenerator", 22 | "images_to_levels", 23 | "calc_region", 24 | "build_anchor_generator", 25 | "ANCHOR_GENERATORS", 26 | "YOLOAnchorGenerator", 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.utils import build_from_cfg, Registry 9 | 10 | ANCHOR_GENERATORS = Registry("Anchor generator") 11 | 12 | 13 | def build_anchor_generator(cfg, default_args=None): 14 | return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args) 15 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | 10 | from .builder import ANCHOR_GENERATORS 11 | 12 | 13 | @ANCHOR_GENERATORS.register_module() 14 | class PointGenerator: 15 | def _meshgrid(self, x, y, row_major=True): 16 | xx = x.repeat(len(y)) 17 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 18 | if row_major: 19 | return xx, yy 20 | else: 21 | return yy, xx 22 | 23 | def grid_points(self, featmap_size, stride=16, device="cuda"): 24 | feat_h, feat_w = featmap_size 25 | shift_x = torch.arange(0.0, feat_w, device=device) * stride 26 | shift_y = torch.arange(0.0, feat_h, device=device) * stride 27 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 28 | stride = shift_x.new_full((shift_xx.shape[0],), stride) 29 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 30 | all_points = shifts.to(device) 31 | return all_points 32 | 33 | def valid_flags(self, featmap_size, valid_size, device="cuda"): 34 | feat_h, feat_w = featmap_size 35 | valid_h, valid_w = valid_size 36 | assert valid_h <= feat_h and valid_w <= feat_w 37 | valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) 38 | valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device) 39 | valid_x[:valid_w] = 1 40 | valid_y[:valid_h] = 1 41 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 42 | valid = valid_xx & valid_yy 43 | return valid 44 | -------------------------------------------------------------------------------- /mmdet/core/anchor/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | 10 | 11 | def images_to_levels(target, num_levels): 12 | """Convert targets by image to targets by feature level. 13 | 14 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 15 | """ 16 | target = torch.stack(target, 0) 17 | level_targets = [] 18 | start = 0 19 | for n in num_levels: 20 | end = start + n 21 | # level_targets.append(target[:, start:end].squeeze(0)) 22 | level_targets.append(target[:, start:end]) 23 | start = end 24 | return level_targets 25 | 26 | 27 | def anchor_inside_flags(flat_anchors, valid_flags, img_shape, allowed_border=0): 28 | """Check whether the anchors are inside the border. 29 | 30 | Args: 31 | flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4). 32 | valid_flags (torch.Tensor): An existing valid flags of anchors. 33 | img_shape (tuple(int)): Shape of current image. 34 | allowed_border (int, optional): The border to allow the valid anchor. 35 | Defaults to 0. 36 | 37 | Returns: 38 | torch.Tensor: Flags indicating whether the anchors are inside a \ 39 | valid range. 40 | """ 41 | img_h, img_w = img_shape[:2] 42 | if allowed_border >= 0: 43 | inside_flags = ( 44 | valid_flags 45 | & (flat_anchors[:, 0] >= -allowed_border) 46 | & (flat_anchors[:, 1] >= -allowed_border) 47 | & (flat_anchors[:, 2] < img_w + allowed_border) 48 | & (flat_anchors[:, 3] < img_h + allowed_border) 49 | ) 50 | else: 51 | inside_flags = valid_flags 52 | return inside_flags 53 | 54 | 55 | def calc_region(bbox, ratio, featmap_size=None): 56 | """Calculate a proportional bbox region. 57 | 58 | The bbox center are fixed and the new h' and w' is h * ratio and w * ratio. 59 | 60 | Args: 61 | bbox (Tensor): Bboxes to calculate regions, shape (n, 4). 62 | ratio (float): Ratio of the output region. 63 | featmap_size (tuple): Feature map size used for clipping the boundary. 64 | 65 | Returns: 66 | tuple: x1, y1, x2, y2 67 | """ 68 | x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long() 69 | y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long() 70 | x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() 71 | y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() 72 | if featmap_size is not None: 73 | x1 = x1.clamp(min=0, max=featmap_size[1]) 74 | y1 = y1.clamp(min=0, max=featmap_size[0]) 75 | x2 = x2.clamp(min=0, max=featmap_size[1]) 76 | y2 = y2.clamp(min=0, max=featmap_size[0]) 77 | return (x1, y1, x2, y2) 78 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .assigners import ( 9 | AssignResult, 10 | BaseAssigner, 11 | CenterRegionAssigner, 12 | MaxIoUAssigner, 13 | RegionAssigner, 14 | ) 15 | from .builder import build_assigner, build_bbox_coder, build_sampler 16 | from .coder import BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, TBLRBBoxCoder 17 | from .iou_calculators import bbox_overlaps, BboxOverlaps2D 18 | from .samplers import ( 19 | BaseSampler, 20 | CombinedSampler, 21 | InstanceBalancedPosSampler, 22 | IoUBalancedNegSampler, 23 | OHEMSampler, 24 | PseudoSampler, 25 | RandomSampler, 26 | SamplingResult, 27 | ScoreHLRSampler, 28 | ) 29 | from .transforms import ( 30 | bbox2distance, 31 | bbox2result, 32 | bbox2roi, 33 | bbox_cxcywh_to_xyxy, 34 | bbox_flip, 35 | bbox_mapping, 36 | bbox_mapping_back, 37 | bbox_rescale, 38 | bbox_xyxy_to_cxcywh, 39 | distance2bbox, 40 | roi2bbox, 41 | ) 42 | 43 | __all__ = [ 44 | "bbox_overlaps", 45 | "BboxOverlaps2D", 46 | "BaseAssigner", 47 | "MaxIoUAssigner", 48 | "AssignResult", 49 | "BaseSampler", 50 | "PseudoSampler", 51 | "RandomSampler", 52 | "InstanceBalancedPosSampler", 53 | "IoUBalancedNegSampler", 54 | "CombinedSampler", 55 | "OHEMSampler", 56 | "SamplingResult", 57 | "ScoreHLRSampler", 58 | "build_assigner", 59 | "build_sampler", 60 | "bbox_flip", 61 | "bbox_mapping", 62 | "bbox_mapping_back", 63 | "bbox2roi", 64 | "roi2bbox", 65 | "bbox2result", 66 | "distance2bbox", 67 | "bbox2distance", 68 | "build_bbox_coder", 69 | "BaseBBoxCoder", 70 | "PseudoBBoxCoder", 71 | "DeltaXYWHBBoxCoder", 72 | "TBLRBBoxCoder", 73 | "CenterRegionAssigner", 74 | "bbox_rescale", 75 | "bbox_cxcywh_to_xyxy", 76 | "bbox_xyxy_to_cxcywh", 77 | "RegionAssigner", 78 | ] 79 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 9 | from .assign_result import AssignResult 10 | from .atss_assigner import ATSSAssigner 11 | from .base_assigner import BaseAssigner 12 | from .center_region_assigner import CenterRegionAssigner 13 | from .grid_assigner import GridAssigner 14 | from .hungarian_assigner import HungarianAssigner 15 | from .max_iou_assigner import MaxIoUAssigner 16 | from .point_assigner import PointAssigner 17 | from .region_assigner import RegionAssigner 18 | 19 | __all__ = [ 20 | "BaseAssigner", 21 | "MaxIoUAssigner", 22 | "ApproxMaxIoUAssigner", 23 | "AssignResult", 24 | "PointAssigner", 25 | "ATSSAssigner", 26 | "CenterRegionAssigner", 27 | "GridAssigner", 28 | "HungarianAssigner", 29 | "RegionAssigner", 30 | ] 31 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from abc import ABCMeta, abstractmethod 9 | 10 | 11 | class BaseAssigner(metaclass=ABCMeta): 12 | """Base assigner that assigns boxes to ground truth boxes.""" 13 | 14 | @abstractmethod 15 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 16 | """Assign boxes to either a ground truth boxe or a negative boxes.""" 17 | pass 18 | -------------------------------------------------------------------------------- /mmdet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.utils import build_from_cfg, Registry 9 | 10 | BBOX_ASSIGNERS = Registry("bbox_assigner") 11 | BBOX_SAMPLERS = Registry("bbox_sampler") 12 | BBOX_CODERS = Registry("bbox_coder") 13 | 14 | 15 | def build_assigner(cfg, **default_args): 16 | """Builder of box assigner.""" 17 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 18 | 19 | 20 | def build_sampler(cfg, **default_args): 21 | """Builder of box sampler.""" 22 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 23 | 24 | 25 | def build_bbox_coder(cfg, **default_args): 26 | """Builder of box coder.""" 27 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 28 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .base_bbox_coder import BaseBBoxCoder 9 | from .bucketing_bbox_coder import BucketingBBoxCoder 10 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 11 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 12 | from .pseudo_bbox_coder import PseudoBBoxCoder 13 | from .tblr_bbox_coder import TBLRBBoxCoder 14 | from .yolo_bbox_coder import YOLOBBoxCoder 15 | 16 | __all__ = [ 17 | "BaseBBoxCoder", 18 | "PseudoBBoxCoder", 19 | "DeltaXYWHBBoxCoder", 20 | "LegacyDeltaXYWHBBoxCoder", 21 | "TBLRBBoxCoder", 22 | "YOLOBBoxCoder", 23 | "BucketingBBoxCoder", 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from abc import ABCMeta, abstractmethod 9 | 10 | 11 | class BaseBBoxCoder(metaclass=ABCMeta): 12 | """Base bounding box coder.""" 13 | 14 | def __init__(self, **kwargs): 15 | pass 16 | 17 | @abstractmethod 18 | def encode(self, bboxes, gt_bboxes): 19 | """Encode deltas between bboxes and ground truth boxes.""" 20 | pass 21 | 22 | @abstractmethod 23 | def decode(self, bboxes, bboxes_pred): 24 | """Decode the predicted bboxes according to prediction and base 25 | boxes.""" 26 | pass 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from ..builder import BBOX_CODERS 9 | from .base_bbox_coder import BaseBBoxCoder 10 | 11 | 12 | @BBOX_CODERS.register_module() 13 | class PseudoBBoxCoder(BaseBBoxCoder): 14 | """Pseudo bounding box coder.""" 15 | 16 | def __init__(self, **kwargs): 17 | super(BaseBBoxCoder, self).__init__(**kwargs) 18 | 19 | def encode(self, bboxes, gt_bboxes): 20 | """torch.Tensor: return the given ``bboxes``""" 21 | return gt_bboxes 22 | 23 | def decode(self, bboxes, pred_bboxes): 24 | """torch.Tensor: return the given ``pred_bboxes``""" 25 | return pred_bboxes 26 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import numpy as np 9 | import torch 10 | 11 | 12 | def ensure_rng(rng=None): 13 | """Simple version of the ``kwarray.ensure_rng`` 14 | 15 | Args: 16 | rng (int | numpy.random.RandomState | None): 17 | if None, then defaults to the global rng. Otherwise this can be an 18 | integer or a RandomState class 19 | Returns: 20 | (numpy.random.RandomState) : rng - 21 | a numpy random number generator 22 | 23 | References: 24 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 25 | """ 26 | 27 | if rng is None: 28 | rng = np.random.mtrand._rand 29 | elif isinstance(rng, int): 30 | rng = np.random.RandomState(rng) 31 | else: 32 | rng = rng 33 | return rng 34 | 35 | 36 | def random_boxes(num=1, scale=1, rng=None): 37 | """Simple version of ``kwimage.Boxes.random`` 38 | 39 | Returns: 40 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 41 | 42 | References: 43 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 44 | 45 | Example: 46 | >>> num = 3 47 | >>> scale = 512 48 | >>> rng = 0 49 | >>> boxes = random_boxes(num, scale, rng) 50 | >>> print(boxes) 51 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 52 | [216.9113, 330.6978, 224.0446, 456.5878], 53 | [405.3632, 196.3221, 493.3953, 270.7942]]) 54 | """ 55 | rng = ensure_rng(rng) 56 | 57 | tlbr = rng.rand(num, 4).astype(np.float32) 58 | 59 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 60 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 61 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 62 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 63 | 64 | tlbr[:, 0] = tl_x * scale 65 | tlbr[:, 1] = tl_y * scale 66 | tlbr[:, 2] = br_x * scale 67 | tlbr[:, 3] = br_y * scale 68 | 69 | boxes = torch.from_numpy(tlbr) 70 | return boxes 71 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .builder import build_iou_calculator 9 | from .iou2d_calculator import bbox_overlaps, BboxOverlaps2D 10 | 11 | __all__ = ["build_iou_calculator", "BboxOverlaps2D", "bbox_overlaps"] 12 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.utils import build_from_cfg, Registry 9 | 10 | IOU_CALCULATORS = Registry("IoU calculator") 11 | 12 | 13 | def build_iou_calculator(cfg, default_args=None): 14 | """Builder of IoU calculator.""" 15 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .builder import build_match_cost 9 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost 10 | 11 | __all__ = [ 12 | "build_match_cost", 13 | "ClassificationCost", 14 | "BBoxL1Cost", 15 | "IoUCost", 16 | "FocalLossCost", 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.utils import build_from_cfg, Registry 9 | 10 | MATCH_COST = Registry("Match Cost") 11 | 12 | 13 | def build_match_cost(cfg, default_args=None): 14 | """Builder of IoU calculator.""" 15 | return build_from_cfg(cfg, MATCH_COST, default_args) 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .base_sampler import BaseSampler 9 | from .combined_sampler import CombinedSampler 10 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 11 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 12 | from .ohem_sampler import OHEMSampler 13 | from .pseudo_sampler import PseudoSampler 14 | from .random_sampler import RandomSampler 15 | from .sampling_result import SamplingResult 16 | from .score_hlr_sampler import ScoreHLRSampler 17 | 18 | __all__ = [ 19 | "BaseSampler", 20 | "PseudoSampler", 21 | "RandomSampler", 22 | "InstanceBalancedPosSampler", 23 | "IoUBalancedNegSampler", 24 | "CombinedSampler", 25 | "OHEMSampler", 26 | "SamplingResult", 27 | "ScoreHLRSampler", 28 | ] 29 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from ..builder import BBOX_SAMPLERS, build_sampler 9 | from .base_sampler import BaseSampler 10 | 11 | 12 | @BBOX_SAMPLERS.register_module() 13 | class CombinedSampler(BaseSampler): 14 | """A sampler that combines positive sampler and negative sampler.""" 15 | 16 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 17 | super(CombinedSampler, self).__init__(**kwargs) 18 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 19 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 20 | 21 | def _sample_pos(self, **kwargs): 22 | """Sample positive samples.""" 23 | raise NotImplementedError 24 | 25 | def _sample_neg(self, **kwargs): 26 | """Sample negative samples.""" 27 | raise NotImplementedError 28 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from ..builder import BBOX_SAMPLERS 12 | from .random_sampler import RandomSampler 13 | 14 | 15 | @BBOX_SAMPLERS.register_module() 16 | class InstanceBalancedPosSampler(RandomSampler): 17 | """Instance balanced sampler that samples equal number of positive samples 18 | for each instance.""" 19 | 20 | def _sample_pos(self, assign_result, num_expected, **kwargs): 21 | """Sample positive boxes. 22 | 23 | Args: 24 | assign_result (:obj:`AssignResult`): The assigned results of boxes. 25 | num_expected (int): The number of expected positive samples 26 | 27 | Returns: 28 | Tensor or ndarray: sampled indices. 29 | """ 30 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 31 | if pos_inds.numel() != 0: 32 | pos_inds = pos_inds.squeeze(1) 33 | if pos_inds.numel() <= num_expected: 34 | return pos_inds 35 | else: 36 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 37 | num_gts = len(unique_gt_inds) 38 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 39 | sampled_inds = [] 40 | for i in unique_gt_inds: 41 | inds = torch.nonzero(assign_result.gt_inds == i.item(), as_tuple=False) 42 | if inds.numel() != 0: 43 | inds = inds.squeeze(1) 44 | else: 45 | continue 46 | if len(inds) > num_per_gt: 47 | inds = self.random_choice(inds, num_per_gt) 48 | sampled_inds.append(inds) 49 | sampled_inds = torch.cat(sampled_inds) 50 | if len(sampled_inds) < num_expected: 51 | num_extra = num_expected - len(sampled_inds) 52 | extra_inds = np.array( 53 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu())) 54 | ) 55 | if len(extra_inds) > num_extra: 56 | extra_inds = self.random_choice(extra_inds, num_extra) 57 | extra_inds = ( 58 | torch.from_numpy(extra_inds).to(assign_result.gt_inds.device).long() 59 | ) 60 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 61 | elif len(sampled_inds) > num_expected: 62 | sampled_inds = self.random_choice(sampled_inds, num_expected) 63 | return sampled_inds 64 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | 10 | from ..builder import BBOX_SAMPLERS 11 | from .base_sampler import BaseSampler 12 | from .sampling_result import SamplingResult 13 | 14 | 15 | @BBOX_SAMPLERS.register_module() 16 | class PseudoSampler(BaseSampler): 17 | """A pseudo sampler that does not do sampling actually.""" 18 | 19 | def __init__(self, **kwargs): 20 | pass 21 | 22 | def _sample_pos(self, **kwargs): 23 | """Sample positive samples.""" 24 | raise NotImplementedError 25 | 26 | def _sample_neg(self, **kwargs): 27 | """Sample negative samples.""" 28 | raise NotImplementedError 29 | 30 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 31 | """Directly returns the positive and negative indices of samples. 32 | 33 | Args: 34 | assign_result (:obj:`AssignResult`): Assigned results 35 | bboxes (torch.Tensor): Bounding boxes 36 | gt_bboxes (torch.Tensor): Ground truth boxes 37 | 38 | Returns: 39 | :obj:`SamplingResult`: sampler results 40 | """ 41 | pos_inds = ( 42 | torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 43 | .squeeze(-1) 44 | .unique() 45 | ) 46 | neg_inds = ( 47 | torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 48 | .squeeze(-1) 49 | .unique() 50 | ) 51 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 52 | sampling_result = SamplingResult( 53 | pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, gt_flags 54 | ) 55 | return sampling_result 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | 10 | from ..builder import BBOX_SAMPLERS 11 | from .base_sampler import BaseSampler 12 | 13 | 14 | @BBOX_SAMPLERS.register_module() 15 | class RandomSampler(BaseSampler): 16 | """Random sampler. 17 | 18 | Args: 19 | num (int): Number of samples 20 | pos_fraction (float): Fraction of positive samples 21 | neg_pos_up (int, optional): Upper bound number of negative and 22 | positive samples. Defaults to -1. 23 | add_gt_as_proposals (bool, optional): Whether to add ground truth 24 | boxes as proposals. Defaults to True. 25 | """ 26 | 27 | def __init__( 28 | self, num, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs 29 | ): 30 | from mmdet.core.bbox import demodata 31 | 32 | super(RandomSampler, self).__init__( 33 | num, pos_fraction, neg_pos_ub, add_gt_as_proposals 34 | ) 35 | self.rng = demodata.ensure_rng(kwargs.get("rng", None)) 36 | 37 | def random_choice(self, gallery, num): 38 | """Random select some elements from the gallery. 39 | 40 | If `gallery` is a Tensor, the returned indices will be a Tensor; 41 | If `gallery` is a ndarray or list, the returned indices will be a 42 | ndarray. 43 | 44 | Args: 45 | gallery (Tensor | ndarray | list): indices pool. 46 | num (int): expected sample num. 47 | 48 | Returns: 49 | Tensor or ndarray: sampled indices. 50 | """ 51 | assert len(gallery) >= num 52 | 53 | is_tensor = isinstance(gallery, torch.Tensor) 54 | if not is_tensor: 55 | if torch.cuda.is_available(): 56 | device = torch.cuda.current_device() 57 | else: 58 | device = "cpu" 59 | gallery = torch.tensor(gallery, dtype=torch.long, device=device) 60 | perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] 61 | rand_inds = gallery[perm] 62 | if not is_tensor: 63 | rand_inds = rand_inds.cpu().numpy() 64 | return rand_inds 65 | 66 | def _sample_pos(self, assign_result, num_expected, **kwargs): 67 | """Randomly sample some positive samples.""" 68 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 69 | if pos_inds.numel() != 0: 70 | pos_inds = pos_inds.squeeze(1) 71 | if pos_inds.numel() <= num_expected: 72 | return pos_inds 73 | else: 74 | return self.random_choice(pos_inds, num_expected) 75 | 76 | def _sample_neg(self, assign_result, num_expected, **kwargs): 77 | """Randomly sample some negative samples.""" 78 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 79 | if neg_inds.numel() != 0: 80 | neg_inds = neg_inds.squeeze(1) 81 | if len(neg_inds) <= num_expected: 82 | return neg_inds 83 | else: 84 | return self.random_choice(neg_inds, num_expected) 85 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .class_names import ( 9 | cityscapes_classes, 10 | coco_classes, 11 | dataset_aliases, 12 | get_classes, 13 | imagenet_det_classes, 14 | imagenet_vid_classes, 15 | voc_classes, 16 | ) 17 | from .eval_hooks import DistEvalHook, EvalHook 18 | from .mean_ap import average_precision, eval_map, print_map_summary 19 | from .recall import eval_recalls, plot_iou_recall, plot_num_recall, print_recall_summary 20 | 21 | __all__ = [ 22 | "voc_classes", 23 | "imagenet_det_classes", 24 | "imagenet_vid_classes", 25 | "coco_classes", 26 | "cityscapes_classes", 27 | "dataset_aliases", 28 | "get_classes", 29 | "DistEvalHook", 30 | "EvalHook", 31 | "average_precision", 32 | "eval_map", 33 | "print_map_summary", 34 | "eval_recalls", 35 | "print_recall_summary", 36 | "plot_num_recall", 37 | "plot_iou_recall", 38 | ] 39 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import numpy as np 9 | 10 | 11 | def bbox_overlaps(bboxes1, bboxes2, mode="iou", eps=1e-6): 12 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 13 | 14 | Args: 15 | bboxes1(ndarray): shape (n, 4) 16 | bboxes2(ndarray): shape (k, 4) 17 | mode(str): iou (intersection over union) or iof (intersection 18 | over foreground) 19 | 20 | Returns: 21 | ious(ndarray): shape (n, k) 22 | """ 23 | 24 | assert mode in ["iou", "iof"] 25 | 26 | bboxes1 = bboxes1.astype(np.float32) 27 | bboxes2 = bboxes2.astype(np.float32) 28 | rows = bboxes1.shape[0] 29 | cols = bboxes2.shape[0] 30 | ious = np.zeros((rows, cols), dtype=np.float32) 31 | if rows * cols == 0: 32 | return ious 33 | exchange = False 34 | if bboxes1.shape[0] > bboxes2.shape[0]: 35 | bboxes1, bboxes2 = bboxes2, bboxes1 36 | ious = np.zeros((cols, rows), dtype=np.float32) 37 | exchange = True 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 39 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 40 | for i in range(bboxes1.shape[0]): 41 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 42 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 43 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 44 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 45 | overlap = np.maximum(x_end - x_start, 0) * np.maximum(y_end - y_start, 0) 46 | if mode == "iou": 47 | union = area1[i] + area2 - overlap 48 | else: 49 | union = area1[i] if not exchange else area2 50 | union = np.maximum(union, eps) 51 | ious[i, :] = overlap / union 52 | if exchange: 53 | ious = ious.T 54 | return ious 55 | -------------------------------------------------------------------------------- /mmdet/core/export/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .pytorch2onnx import ( 9 | build_model_from_cfg, 10 | generate_inputs_and_wrap_model, 11 | preprocess_example_input, 12 | ) 13 | 14 | __all__ = [ 15 | "build_model_from_cfg", 16 | "generate_inputs_and_wrap_model", 17 | "preprocess_example_input", 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .deprecated_fp16_utils import ( 9 | deprecated_auto_fp16 as auto_fp16, 10 | deprecated_force_fp32 as force_fp32, 11 | deprecated_wrap_fp16_model as wrap_fp16_model, 12 | DeprecatedFp16OptimizerHook as Fp16OptimizerHook, 13 | ) 14 | 15 | __all__ = ["auto_fp16", "force_fp32", "Fp16OptimizerHook", "wrap_fp16_model"] 16 | -------------------------------------------------------------------------------- /mmdet/core/fp16/deprecated_fp16_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import warnings 9 | 10 | from mmcv.runner import auto_fp16, force_fp32, Fp16OptimizerHook, wrap_fp16_model 11 | 12 | 13 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook): 14 | """A wrapper class for the FP16 optimizer hook. This class wraps 15 | :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the 16 | :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated. 17 | 18 | Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details. 19 | 20 | Args: 21 | loss_scale (float): Scale factor multiplied with loss. 22 | """ 23 | 24 | def __init__(*args, **kwargs): 25 | super().__init__(*args, **kwargs) 26 | warnings.warn( 27 | 'Importing Fp16OptimizerHook from "mmdet.core" will be ' 28 | 'deprecated in the future. Please import them from "mmcv.runner" ' 29 | "instead" 30 | ) 31 | 32 | 33 | def deprecated_auto_fp16(*args, **kwargs): 34 | warnings.warn( 35 | 'Importing auto_fp16 from "mmdet.core" will be ' 36 | 'deprecated in the future. Please import them from "mmcv.runner" ' 37 | "instead" 38 | ) 39 | return auto_fp16(*args, **kwargs) 40 | 41 | 42 | def deprecated_force_fp32(*args, **kwargs): 43 | warnings.warn( 44 | 'Importing force_fp32 from "mmdet.core" will be ' 45 | 'deprecated in the future. Please import them from "mmcv.runner" ' 46 | "instead" 47 | ) 48 | return force_fp32(*args, **kwargs) 49 | 50 | 51 | def deprecated_wrap_fp16_model(*args, **kwargs): 52 | warnings.warn( 53 | 'Importing wrap_fp16_model from "mmdet.core" will be ' 54 | 'deprecated in the future. Please import them from "mmcv.runner" ' 55 | "instead" 56 | ) 57 | wrap_fp16_model(*args, **kwargs) 58 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .mask_target import mask_target 9 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 10 | from .utils import encode_mask_results, split_combined_polys 11 | 12 | __all__ = [ 13 | "split_combined_polys", 14 | "mask_target", 15 | "BaseInstanceMasks", 16 | "BitmapMasks", 17 | "PolygonMasks", 18 | "encode_mask_results", 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import numpy as np 9 | import torch 10 | from torch.nn.modules.utils import _pair 11 | 12 | 13 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, cfg): 14 | """Compute mask target for positive proposals in multiple images. 15 | 16 | Args: 17 | pos_proposals_list (list[Tensor]): Positive proposals in multiple 18 | images. 19 | pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each 20 | positive proposals. 21 | gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of 22 | each image. 23 | cfg (dict): Config dict that specifies the mask size. 24 | 25 | Returns: 26 | list[Tensor]: Mask target of each image. 27 | """ 28 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 29 | mask_targets = map( 30 | mask_target_single, 31 | pos_proposals_list, 32 | pos_assigned_gt_inds_list, 33 | gt_masks_list, 34 | cfg_list, 35 | ) 36 | mask_targets = list(mask_targets) 37 | if len(mask_targets) > 0: 38 | mask_targets = torch.cat(mask_targets) 39 | return mask_targets 40 | 41 | 42 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 43 | """Compute mask target for each positive proposal in the image. 44 | 45 | Args: 46 | pos_proposals (Tensor): Positive proposals. 47 | pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals. 48 | gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap 49 | or Polygon. 50 | cfg (dict): Config dict that indicate the mask size. 51 | 52 | Returns: 53 | Tensor: Mask target of each positive proposals in the image. 54 | """ 55 | device = pos_proposals.device 56 | mask_size = _pair(cfg.mask_size) 57 | num_pos = pos_proposals.size(0) 58 | if num_pos > 0: 59 | proposals_np = pos_proposals.cpu().numpy() 60 | maxh, maxw = gt_masks.height, gt_masks.width 61 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw) 62 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh) 63 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 64 | 65 | mask_targets = gt_masks.crop_and_resize( 66 | proposals_np, mask_size, device=device, inds=pos_assigned_gt_inds 67 | ).to_ndarray() 68 | 69 | mask_targets = torch.from_numpy(mask_targets).float().to(device) 70 | else: 71 | mask_targets = pos_proposals.new_zeros((0,) + mask_size) 72 | 73 | return mask_targets 74 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import mmcv 9 | import numpy as np 10 | import pycocotools.mask as mask_util 11 | 12 | 13 | def split_combined_polys(polys, poly_lens, polys_per_mask): 14 | """Split the combined 1-D polys into masks. 15 | 16 | A mask is represented as a list of polys, and a poly is represented as 17 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 18 | tensor. Here we need to split the tensor into original representations. 19 | 20 | Args: 21 | polys (list): a list (length = image num) of 1-D tensors 22 | poly_lens (list): a list (length = image num) of poly length 23 | polys_per_mask (list): a list (length = image num) of poly number 24 | of each mask 25 | 26 | Returns: 27 | list: a list (length = image num) of list (length = mask num) of \ 28 | list (length = poly num) of numpy array. 29 | """ 30 | mask_polys_list = [] 31 | for img_id in range(len(polys)): 32 | polys_single = polys[img_id] 33 | polys_lens_single = poly_lens[img_id].tolist() 34 | polys_per_mask_single = polys_per_mask[img_id].tolist() 35 | 36 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 37 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 38 | mask_polys_list.append(mask_polys) 39 | return mask_polys_list 40 | 41 | 42 | # TODO: move this function to more proper place 43 | def encode_mask_results(mask_results): 44 | """Encode bitmap mask to RLE code. 45 | 46 | Args: 47 | mask_results (list | tuple[list]): bitmap mask results. 48 | In mask scoring rcnn, mask_results is a tuple of (segm_results, 49 | segm_cls_score). 50 | 51 | Returns: 52 | list | tuple: RLE encoded mask. 53 | """ 54 | if isinstance(mask_results, tuple): # mask scoring 55 | cls_segms, cls_mask_scores = mask_results 56 | else: 57 | cls_segms = mask_results 58 | num_classes = len(cls_segms) 59 | encoded_mask_results = [[] for _ in range(num_classes)] 60 | for i in range(len(cls_segms)): 61 | for cls_segm in cls_segms[i]: 62 | encoded_mask_results[i].append( 63 | mask_util.encode( 64 | np.array(cls_segm[:, :, np.newaxis], order="F", dtype="uint8") 65 | )[0] 66 | ) # encoded with RLE 67 | if isinstance(mask_results, tuple): 68 | return encoded_mask_results, cls_mask_scores 69 | else: 70 | return encoded_mask_results 71 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .bbox_nms import fast_nms, multiclass_nms 9 | from .merge_augs import ( 10 | merge_aug_bboxes, 11 | merge_aug_masks, 12 | merge_aug_proposals, 13 | merge_aug_scores, 14 | ) 15 | 16 | __all__ = [ 17 | "multiclass_nms", 18 | "merge_aug_proposals", 19 | "merge_aug_bboxes", 20 | "merge_aug_scores", 21 | "merge_aug_masks", 22 | "fast_nms", 23 | ] 24 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .dist_utils import allreduce_grads, DistOptimizerHook, reduce_mean 9 | from .misc import mask2ndarray, multi_apply, unmap 10 | 11 | __all__ = [ 12 | "allreduce_grads", 13 | "DistOptimizerHook", 14 | "reduce_mean", 15 | "multi_apply", 16 | "unmap", 17 | "mask2ndarray", 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import warnings 9 | from collections import OrderedDict 10 | 11 | import torch.distributed as dist 12 | from mmcv.runner import OptimizerHook 13 | from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors 14 | 15 | 16 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 17 | if bucket_size_mb > 0: 18 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 19 | buckets = _take_tensors(tensors, bucket_size_bytes) 20 | else: 21 | buckets = OrderedDict() 22 | for tensor in tensors: 23 | tp = tensor.type() 24 | if tp not in buckets: 25 | buckets[tp] = [] 26 | buckets[tp].append(tensor) 27 | buckets = buckets.values() 28 | 29 | for bucket in buckets: 30 | flat_tensors = _flatten_dense_tensors(bucket) 31 | dist.all_reduce(flat_tensors) 32 | flat_tensors.div_(world_size) 33 | for tensor, synced in zip( 34 | bucket, _unflatten_dense_tensors(flat_tensors, bucket) 35 | ): 36 | tensor.copy_(synced) 37 | 38 | 39 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 40 | """Allreduce gradients. 41 | 42 | Args: 43 | params (list[torch.Parameters]): List of parameters of a model 44 | coalesce (bool, optional): Whether allreduce parameters as a whole. 45 | Defaults to True. 46 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 47 | Defaults to -1. 48 | """ 49 | grads = [ 50 | param.grad.data 51 | for param in params 52 | if param.requires_grad and param.grad is not None 53 | ] 54 | world_size = dist.get_world_size() 55 | if coalesce: 56 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 57 | else: 58 | for tensor in grads: 59 | dist.all_reduce(tensor.div_(world_size)) 60 | 61 | 62 | class DistOptimizerHook(OptimizerHook): 63 | """Deprecated optimizer hook for distributed training.""" 64 | 65 | def __init__(self, *args, **kwargs): 66 | warnings.warn( 67 | '"DistOptimizerHook" is deprecated, please switch to' 68 | '"mmcv.runner.OptimizerHook".' 69 | ) 70 | super().__init__(*args, **kwargs) 71 | 72 | 73 | def reduce_mean(tensor): 74 | """ "Obtain the mean of tensor on different GPUs.""" 75 | if not (dist.is_available() and dist.is_initialized()): 76 | return tensor 77 | tensor = tensor.clone() 78 | dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) 79 | return tensor 80 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from functools import partial 9 | 10 | import numpy as np 11 | import torch 12 | from six.moves import map, zip 13 | 14 | from ..mask.structures import BitmapMasks, PolygonMasks 15 | 16 | 17 | def multi_apply(func, *args, **kwargs): 18 | """Apply function to a list of arguments. 19 | 20 | Note: 21 | This function applies the ``func`` to multiple inputs and 22 | map the multiple outputs of the ``func`` into different 23 | list. Each list contains the same type of outputs corresponding 24 | to different inputs. 25 | 26 | Args: 27 | func (Function): A function that will be applied to a list of 28 | arguments 29 | 30 | Returns: 31 | tuple(list): A tuple containing multiple list, each list contains \ 32 | a kind of returned results by the function 33 | """ 34 | pfunc = partial(func, **kwargs) if kwargs else func 35 | map_results = map(pfunc, *args) 36 | return tuple(map(list, zip(*map_results))) 37 | 38 | 39 | def unmap(data, count, inds, fill=0): 40 | """Unmap a subset of item (data) back to the original set of items (of size 41 | count)""" 42 | if data.dim() == 1: 43 | ret = data.new_full((count,), fill) 44 | ret[inds.type(torch.bool)] = data 45 | else: 46 | new_size = (count,) + data.size()[1:] 47 | ret = data.new_full(new_size, fill) 48 | ret[inds.type(torch.bool), :] = data 49 | return ret 50 | 51 | 52 | def mask2ndarray(mask): 53 | """Convert Mask to ndarray.. 54 | 55 | Args: 56 | mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or 57 | torch.Tensor or np.ndarray): The mask to be converted. 58 | 59 | Returns: 60 | np.ndarray: Ndarray mask of shape (n, h, w) that has been converted 61 | """ 62 | if isinstance(mask, (BitmapMasks, PolygonMasks)): 63 | mask = mask.to_ndarray() 64 | elif isinstance(mask, torch.Tensor): 65 | mask = mask.detach().cpu().numpy() 66 | elif not isinstance(mask, np.ndarray): 67 | raise TypeError(f"Unsupported {type(mask)} data type") 68 | return mask 69 | -------------------------------------------------------------------------------- /mmdet/core/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .image import color_val_matplotlib, imshow_det_bboxes, imshow_gt_det_bboxes 9 | 10 | __all__ = ["imshow_det_bboxes", "imshow_gt_det_bboxes", "color_val_matplotlib"] 11 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .builder import build_dataloader, build_dataset, DATASETS, PIPELINES 9 | from .coco import CocoDataset 10 | from .coco_split import CocoSplitDataset 11 | from .coco_split_online import CocoSplitOnlineDataset 12 | from .coco_split_pseudo_masks import CocoSplitPseudoMasksDataset 13 | from .dataset_wrappers import ClassBalancedDataset, ConcatDataset, RepeatDataset 14 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 15 | from .utils import get_loading_pipeline, replace_ImageToTensor 16 | 17 | __all__ = [ 18 | "CustomDataset", 19 | "CocoDataset", 20 | "GroupSampler", 21 | "DistributedGroupSampler", 22 | "DistributedSampler", 23 | "build_dataloader", 24 | "ConcatDataset", 25 | "RepeatDataset", 26 | "ClassBalancedDataset", 27 | "DATASETS", 28 | "PIPELINES", 29 | "build_dataset", 30 | "replace_ImageToTensor", 31 | "get_loading_pipeline" "CocoSplitDataset", 32 | "CocoSplitPseudoMasksDataset", 33 | "CocoSplitOnlineDataset", 34 | ] 35 | -------------------------------------------------------------------------------- /mmdet/datasets/coco_split_online.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import json 9 | import random 10 | 11 | from pycocotools.coco import COCO 12 | 13 | from .builder import DATASETS 14 | from .coco_split import CocoSplitDataset 15 | 16 | 17 | @DATASETS.register_module() 18 | class CocoSplitOnlineDataset(CocoSplitDataset): 19 | """ 20 | Different from other MMDet dataset, this one loads annotations 21 | online instead of from a whole json. This is more memory 22 | efficient albeit a little bit slower. 23 | This enables training on 3M+ masks, which would not be feasible 24 | with a single json to store. 25 | """ 26 | 27 | def __init__( 28 | self, 29 | ann_dir=None, 30 | iou_thresh=None, 31 | score_thresh=None, 32 | top_k=None, 33 | random_sample_masks=False, 34 | **kwargs, 35 | ): 36 | """ 37 | Args: 38 | ann_dir: directory to store the annotations, where annotations 39 | for each image is stored as "image_id.json" 40 | For other arguments please see coco_split_pseudo_masks.py 41 | """ 42 | self.ann_dir = ann_dir 43 | self.iou_thresh = iou_thresh 44 | self.score_thresh = score_thresh 45 | self.top_k = top_k 46 | self.random_sample_masks = random_sample_masks 47 | super(CocoSplitOnlineDataset, self).__init__(**kwargs) 48 | 49 | # Override to load pseudo masks online 50 | def get_ann_info(self, idx): 51 | img_id = self.data_infos[idx]["id"] 52 | ann_info = json.load(open(f"{self.ann_dir}{img_id}.json")) 53 | ann_info = self.sample_targets(ann_info) 54 | return self._parse_ann_info(self.data_infos[idx], ann_info) 55 | 56 | def sample_targets(self, annotations): 57 | new_anns = annotations 58 | if self.iou_thresh is not None: 59 | tmp_new_anns = [] 60 | for ann in new_anns: 61 | if ann["gt_iou"] < self.iou_thresh: 62 | tmp_new_anns.append(ann) 63 | new_anns = tmp_new_anns 64 | if self.score_thresh is not None: 65 | tmp_new_anns = [] 66 | for ann in new_anns: 67 | if ann["score"] >= self.score_thresh: 68 | tmp_new_anns.append(ann) 69 | new_anns = tmp_new_anns 70 | if self.random_sample_masks: 71 | random.shuffle(new_anns) 72 | if self.top_k is not None: 73 | new_anns = new_anns[: self.top_k] 74 | return new_anns 75 | -------------------------------------------------------------------------------- /mmdet/datasets/coco_split_pseudo_masks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import random 9 | 10 | from pycocotools.coco import COCO 11 | 12 | from .builder import DATASETS 13 | from .coco_split import CocoSplitDataset 14 | 15 | 16 | @DATASETS.register_module() 17 | class CocoSplitPseudoMasksDataset(CocoSplitDataset): 18 | """ 19 | Used to joint train on images with both pseudo-GT and GT. 20 | """ 21 | 22 | def __init__( 23 | self, 24 | additional_ann_file=None, 25 | iou_thresh=None, 26 | score_thresh=None, 27 | top_k=None, 28 | random_sample_masks=False, 29 | **kwargs, 30 | ): 31 | # Add additional annotation file (eg. from pseudo masks) 32 | self.additional_coco = None 33 | if additional_ann_file is not None: 34 | self.additional_coco = COCO(additional_ann_file) 35 | self.iou_thresh = iou_thresh 36 | self.score_thresh = score_thresh 37 | self.top_k = top_k 38 | self.random_sample_masks = random_sample_masks 39 | super(CocoSplitPseudoMasksDataset, self).__init__(**kwargs) 40 | 41 | # Override to load pseudo masks 42 | def get_ann_info(self, idx): 43 | img_id = self.data_infos[idx]["id"] 44 | ann_ids = self.coco.get_ann_ids(img_ids=[img_id]) 45 | ann_info = self.coco.load_anns(ann_ids) 46 | all_anns = [] 47 | all_anns.extend(ann_info) 48 | if self.additional_coco is not None: 49 | additional_ann_ids = self.additional_coco.get_ann_ids(img_ids=[img_id]) 50 | additional_ann_info = self.additional_coco.load_anns(additional_ann_ids) 51 | additional_ann_info = self.sample_targets(additional_ann_info) 52 | all_anns.extend(additional_ann_info) 53 | return self._parse_ann_info(self.data_infos[idx], all_anns) 54 | 55 | def sample_targets(self, annotations): 56 | new_anns = annotations 57 | if self.iou_thresh is not None: 58 | tmp_new_anns = [] 59 | for ann in new_anns: 60 | if ann["gt_iou"] < self.iou_thresh: 61 | tmp_new_anns.append(ann) 62 | new_anns = tmp_new_anns 63 | if self.score_thresh is not None: 64 | tmp_new_anns = [] 65 | for ann in new_anns: 66 | if ann["score"] >= self.score_thresh: 67 | tmp_new_anns.append(ann) 68 | new_anns = tmp_new_anns 69 | if self.random_sample_masks: 70 | random.shuffle(new_anns) 71 | if self.top_k is not None: 72 | new_anns = new_anns[: self.top_k] 73 | return new_anns 74 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .auto_augment import ( 9 | AutoAugment, 10 | BrightnessTransform, 11 | ColorTransform, 12 | ContrastTransform, 13 | EqualizeTransform, 14 | Rotate, 15 | Shear, 16 | Translate, 17 | ) 18 | from .compose import Compose 19 | from .formating import ( 20 | Collect, 21 | DefaultFormatBundle, 22 | ImageToTensor, 23 | to_tensor, 24 | ToDataContainer, 25 | ToTensor, 26 | Transpose, 27 | ) 28 | from .instaboost import InstaBoost 29 | from .loading import ( 30 | LoadAnnotations, 31 | LoadImageFromFile, 32 | LoadImageFromWebcam, 33 | LoadMultiChannelImageFromFiles, 34 | LoadProposals, 35 | ) 36 | from .test_time_aug import MultiScaleFlipAug 37 | from .transforms import ( 38 | Albu, 39 | CutOut, 40 | Expand, 41 | MinIoURandomCrop, 42 | Normalize, 43 | Pad, 44 | PhotoMetricDistortion, 45 | RandomCenterCropPad, 46 | RandomCrop, 47 | RandomFlip, 48 | Resize, 49 | SegRescale, 50 | ) 51 | 52 | __all__ = [ 53 | "Compose", 54 | "to_tensor", 55 | "ToTensor", 56 | "ImageToTensor", 57 | "ToDataContainer", 58 | "Transpose", 59 | "Collect", 60 | "DefaultFormatBundle", 61 | "LoadAnnotations", 62 | "LoadImageFromFile", 63 | "LoadImageFromWebcam", 64 | "LoadMultiChannelImageFromFiles", 65 | "LoadProposals", 66 | "MultiScaleFlipAug", 67 | "Resize", 68 | "RandomFlip", 69 | "Pad", 70 | "RandomCrop", 71 | "Normalize", 72 | "SegRescale", 73 | "MinIoURandomCrop", 74 | "Expand", 75 | "PhotoMetricDistortion", 76 | "Albu", 77 | "InstaBoost", 78 | "RandomCenterCropPad", 79 | "AutoAugment", 80 | "CutOut", 81 | "Shear", 82 | "Rotate", 83 | "ColorTransform", 84 | "EqualizeTransform", 85 | "BrightnessTransform", 86 | "ContrastTransform", 87 | "Translate", 88 | ] 89 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import collections 9 | 10 | from mmcv.utils import build_from_cfg 11 | 12 | from ..builder import PIPELINES 13 | 14 | 15 | @PIPELINES.register_module() 16 | class Compose: 17 | """Compose multiple transforms sequentially. 18 | 19 | Args: 20 | transforms (Sequence[dict | callable]): Sequence of transform object or 21 | config dict to be composed. 22 | """ 23 | 24 | def __init__(self, transforms): 25 | assert isinstance(transforms, collections.abc.Sequence) 26 | self.transforms = [] 27 | for transform in transforms: 28 | if isinstance(transform, dict): 29 | transform = build_from_cfg(transform, PIPELINES) 30 | self.transforms.append(transform) 31 | elif callable(transform): 32 | self.transforms.append(transform) 33 | else: 34 | raise TypeError("transform must be callable or a dict") 35 | 36 | def __call__(self, data): 37 | """Call function to apply transforms sequentially. 38 | 39 | Args: 40 | data (dict): A result dict contains the data to transform. 41 | 42 | Returns: 43 | dict: Transformed data. 44 | """ 45 | 46 | for t in self.transforms: 47 | data = t(data) 48 | if data is None: 49 | return None 50 | return data 51 | 52 | def __repr__(self): 53 | format_string = self.__class__.__name__ + "(" 54 | for t in self.transforms: 55 | format_string += "\n" 56 | format_string += f" {t}" 57 | format_string += "\n)" 58 | return format_string 59 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .distributed_sampler import DistributedSampler 9 | from .group_sampler import DistributedGroupSampler, GroupSampler 10 | 11 | __all__ = ["DistributedSampler", "DistributedGroupSampler", "GroupSampler"] 12 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import math 9 | 10 | import torch 11 | from torch.utils.data import DistributedSampler as _DistributedSampler 12 | 13 | 14 | class DistributedSampler(_DistributedSampler): 15 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 16 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 17 | self.shuffle = shuffle 18 | 19 | def __iter__(self): 20 | # deterministically shuffle based on epoch 21 | if self.shuffle: 22 | g = torch.Generator() 23 | g.manual_seed(self.epoch) 24 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 25 | else: 26 | indices = torch.arange(len(self.dataset)).tolist() 27 | 28 | # add extra samples to make it evenly divisible 29 | # in case that indices is shorter than half of total_size 30 | indices = (indices * math.ceil(self.total_size / len(indices)))[ 31 | : self.total_size 32 | ] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | indices = indices[self.rank : self.total_size : self.num_replicas] 37 | assert len(indices) == self.num_samples 38 | 39 | return iter(indices) 40 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .backbones import * # noqa: F401,F403 9 | from .builder import ( 10 | BACKBONES, 11 | build_backbone, 12 | build_detector, 13 | build_head, 14 | build_loss, 15 | build_neck, 16 | build_roi_extractor, 17 | build_shared_head, 18 | DETECTORS, 19 | HEADS, 20 | LOSSES, 21 | NECKS, 22 | ROI_EXTRACTORS, 23 | SHARED_HEADS, 24 | ) 25 | from .dense_heads import * # noqa: F401,F403 26 | from .detectors import * # noqa: F401,F403 27 | from .losses import * # noqa: F401,F403 28 | from .necks import * # noqa: F401,F403 29 | from .roi_heads import * # noqa: F401,F403 30 | 31 | __all__ = [ 32 | "BACKBONES", 33 | "NECKS", 34 | "ROI_EXTRACTORS", 35 | "SHARED_HEADS", 36 | "HEADS", 37 | "LOSSES", 38 | "DETECTORS", 39 | "build_backbone", 40 | "build_neck", 41 | "build_roi_extractor", 42 | "build_shared_head", 43 | "build_head", 44 | "build_loss", 45 | "build_detector", 46 | ] 47 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .darknet import Darknet 9 | from .detectors_resnet import DetectoRS_ResNet 10 | from .detectors_resnext import DetectoRS_ResNeXt 11 | from .hourglass import HourglassNet 12 | from .hrnet import HRNet 13 | from .regnet import RegNet 14 | from .res2net import Res2Net 15 | from .resnest import ResNeSt 16 | from .resnet import ResNet, ResNetV1d 17 | from .resnext import ResNeXt 18 | from .ssd_vgg import SSDVGG 19 | from .trident_resnet import TridentResNet 20 | 21 | __all__ = [ 22 | "RegNet", 23 | "ResNet", 24 | "ResNetV1d", 25 | "ResNeXt", 26 | "SSDVGG", 27 | "HRNet", 28 | "Res2Net", 29 | "HourglassNet", 30 | "DetectoRS_ResNet", 31 | "DetectoRS_ResNeXt", 32 | "Darknet", 33 | "ResNeSt", 34 | "TridentResNet", 35 | ] 36 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import warnings 9 | 10 | from mmcv.utils import build_from_cfg, Registry 11 | from torch import nn 12 | 13 | BACKBONES = Registry("backbone") 14 | NECKS = Registry("neck") 15 | ROI_EXTRACTORS = Registry("roi_extractor") 16 | SHARED_HEADS = Registry("shared_head") 17 | HEADS = Registry("head") 18 | LOSSES = Registry("loss") 19 | DETECTORS = Registry("detector") 20 | 21 | 22 | def build(cfg, registry, default_args=None): 23 | """Build a module. 24 | 25 | Args: 26 | cfg (dict, list[dict]): The config of modules, is is either a dict 27 | or a list of configs. 28 | registry (:obj:`Registry`): A registry the module belongs to. 29 | default_args (dict, optional): Default arguments to build the module. 30 | Defaults to None. 31 | 32 | Returns: 33 | nn.Module: A built nn module. 34 | """ 35 | if isinstance(cfg, list): 36 | modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg] 37 | return nn.Sequential(*modules) 38 | else: 39 | return build_from_cfg(cfg, registry, default_args) 40 | 41 | 42 | def build_backbone(cfg): 43 | """Build backbone.""" 44 | return build(cfg, BACKBONES) 45 | 46 | 47 | def build_neck(cfg): 48 | """Build neck.""" 49 | return build(cfg, NECKS) 50 | 51 | 52 | def build_roi_extractor(cfg): 53 | """Build roi extractor.""" 54 | return build(cfg, ROI_EXTRACTORS) 55 | 56 | 57 | def build_shared_head(cfg): 58 | """Build shared head.""" 59 | return build(cfg, SHARED_HEADS) 60 | 61 | 62 | def build_head(cfg): 63 | """Build head.""" 64 | return build(cfg, HEADS) 65 | 66 | 67 | def build_loss(cfg): 68 | """Build loss.""" 69 | return build(cfg, LOSSES) 70 | 71 | 72 | def build_detector(cfg, train_cfg=None, test_cfg=None): 73 | """Build detector.""" 74 | if train_cfg is not None or test_cfg is not None: 75 | warnings.warn( 76 | "train_cfg and test_cfg is deprecated, " "please specify them in model", 77 | UserWarning, 78 | ) 79 | assert ( 80 | cfg.get("train_cfg") is None or train_cfg is None 81 | ), "train_cfg specified in both outer field and model field " 82 | assert ( 83 | cfg.get("test_cfg") is None or test_cfg is None 84 | ), "test_cfg specified in both outer field and model field " 85 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 86 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .anchor_head import AnchorHead 9 | from .oln_rpn_head import OlnRPNHead 10 | from .rpn_head import RPNHead 11 | 12 | 13 | __all__ = [ 14 | "AnchorHead", 15 | "RPNHead", 16 | "OlnRPNHead", 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/base_dense_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from abc import ABCMeta, abstractmethod 9 | 10 | import torch.nn as nn 11 | 12 | 13 | class BaseDenseHead(nn.Module, metaclass=ABCMeta): 14 | """Base class for DenseHeads.""" 15 | 16 | def __init__(self): 17 | super(BaseDenseHead, self).__init__() 18 | 19 | @abstractmethod 20 | def loss(self, **kwargs): 21 | """Compute losses of the head.""" 22 | pass 23 | 24 | @abstractmethod 25 | def get_bboxes(self, **kwargs): 26 | """Transform network output for a batch into bbox predictions.""" 27 | pass 28 | 29 | def forward_train( 30 | self, 31 | x, 32 | img_metas, 33 | gt_bboxes, 34 | gt_labels=None, 35 | gt_bboxes_ignore=None, 36 | proposal_cfg=None, 37 | **kwargs, 38 | ): 39 | """ 40 | Args: 41 | x (list[Tensor]): Features from FPN. 42 | img_metas (list[dict]): Meta information of each image, e.g., 43 | image size, scaling factor, etc. 44 | gt_bboxes (Tensor): Ground truth bboxes of the image, 45 | shape (num_gts, 4). 46 | gt_labels (Tensor): Ground truth labels of each box, 47 | shape (num_gts,). 48 | gt_bboxes_ignore (Tensor): Ground truth bboxes to be 49 | ignored, shape (num_ignored_gts, 4). 50 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 51 | if None, test_cfg would be used 52 | 53 | Returns: 54 | tuple: 55 | losses: (dict[str, Tensor]): A dictionary of loss components. 56 | proposal_list (list[Tensor]): Proposals of each image. 57 | """ 58 | outs = self(x) 59 | if gt_labels is None: 60 | loss_inputs = outs + (gt_bboxes, img_metas) 61 | else: 62 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) 63 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 64 | if proposal_cfg is None: 65 | return losses 66 | else: 67 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 68 | return losses, proposal_list 69 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/rpn_test_mixin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import sys 9 | 10 | from mmdet.core import merge_aug_proposals 11 | 12 | if sys.version_info >= (3, 7): 13 | from mmdet.utils.contextmanagers import completed 14 | 15 | 16 | class RPNTestMixin: 17 | """Test methods of RPN.""" 18 | 19 | if sys.version_info >= (3, 7): 20 | 21 | async def async_simple_test_rpn(self, x, img_metas): 22 | sleep_interval = self.test_cfg.pop("async_sleep_interval", 0.025) 23 | async with completed( 24 | __name__, "rpn_head_forward", sleep_interval=sleep_interval 25 | ): 26 | rpn_outs = self(x) 27 | 28 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 29 | return proposal_list 30 | 31 | def simple_test_rpn(self, x, img_metas): 32 | """Test without augmentation. 33 | 34 | Args: 35 | x (tuple[Tensor]): Features from the upstream network, each is 36 | a 4D-tensor. 37 | img_metas (list[dict]): Meta info of each image. 38 | 39 | Returns: 40 | list[Tensor]: Proposals of each image. 41 | """ 42 | rpn_outs = self(x) 43 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 44 | return proposal_list 45 | 46 | def aug_test_rpn(self, feats, img_metas): 47 | samples_per_gpu = len(img_metas[0]) 48 | aug_proposals = [[] for _ in range(samples_per_gpu)] 49 | for x, img_meta in zip(feats, img_metas): 50 | proposal_list = self.simple_test_rpn(x, img_meta) 51 | for i, proposals in enumerate(proposal_list): 52 | aug_proposals[i].append(proposals) 53 | # reorganize the order of 'img_metas' to match the dimensions 54 | # of 'aug_proposals' 55 | aug_img_metas = [] 56 | for i in range(samples_per_gpu): 57 | aug_img_meta = [] 58 | for j in range(len(img_metas)): 59 | aug_img_meta.append(img_metas[j][i]) 60 | aug_img_metas.append(aug_img_meta) 61 | # after merging, proposals will be rescaled to the original image size 62 | merged_proposals = [ 63 | merge_aug_proposals(proposals, aug_img_meta, self.test_cfg) 64 | for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas) 65 | ] 66 | return merged_proposals 67 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .base import BaseDetector 9 | from .faster_rcnn import FasterRCNN 10 | from .mask_rcnn import MaskRCNN 11 | from .pa_predictor import ( 12 | PairwiseAffinityHead, 13 | PairwiseAffinityHeadUperNet, 14 | PairwiseAffinityPredictor, 15 | ) 16 | from .rpn import RPN 17 | 18 | # 19 | from .rpn_detector import RPNDetector 20 | from .two_stage import TwoStageDetector 21 | from .two_tower import TwoTowerDetector 22 | 23 | __all__ = [ 24 | "BaseDetector", 25 | "TwoStageDetector", 26 | "RPN", 27 | "FasterRCNN", 28 | "MaskRCNN", 29 | "RPNDetector", 30 | "PairwiseAffinityPredictor", 31 | "PairwiseAffinityHead", 32 | "PairwiseAffinityHeadUperNet", 33 | "TwoTowerDetector", 34 | ] 35 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from ..builder import DETECTORS 9 | from .two_stage import TwoStageDetector 10 | 11 | 12 | @DETECTORS.register_module() 13 | class FasterRCNN(TwoStageDetector): 14 | """Implementation of `Faster R-CNN `_""" 15 | 16 | def __init__( 17 | self, 18 | backbone, 19 | rpn_head, 20 | roi_head, 21 | train_cfg, 22 | test_cfg, 23 | neck=None, 24 | pretrained=None, 25 | ): 26 | super(FasterRCNN, self).__init__( 27 | backbone=backbone, 28 | neck=neck, 29 | rpn_head=rpn_head, 30 | roi_head=roi_head, 31 | train_cfg=train_cfg, 32 | test_cfg=test_cfg, 33 | pretrained=pretrained, 34 | ) 35 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from ..builder import DETECTORS 9 | from .two_stage import TwoStageDetector 10 | 11 | 12 | @DETECTORS.register_module() 13 | class MaskRCNN(TwoStageDetector): 14 | """Implementation of `Mask R-CNN `_""" 15 | 16 | def __init__( 17 | self, 18 | backbone, 19 | rpn_head, 20 | roi_head, 21 | train_cfg, 22 | test_cfg, 23 | neck=None, 24 | pretrained=None, 25 | ): 26 | super(MaskRCNN, self).__init__( 27 | backbone=backbone, 28 | neck=neck, 29 | rpn_head=rpn_head, 30 | roi_head=roi_head, 31 | train_cfg=train_cfg, 32 | test_cfg=test_cfg, 33 | pretrained=pretrained, 34 | ) 35 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn_detector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | from mmdet.core import bbox2result 10 | 11 | from ..builder import DETECTORS 12 | from .rpn import RPN 13 | 14 | 15 | @DETECTORS.register_module() 16 | class RPNDetector(RPN): 17 | def simple_test(self, img, img_metas, rescale=False): 18 | """Test function without test time augmentation. 19 | 20 | Args: 21 | imgs (list[torch.Tensor]): List of multiple images 22 | img_metas (list[dict]): List of image information. 23 | rescale (bool, optional): Whether to rescale the results. 24 | Defaults to False. 25 | 26 | Returns: 27 | list[np.ndarray]: proposals 28 | """ 29 | x = self.extract_feat(img) 30 | proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) 31 | if rescale: 32 | for proposals, meta in zip(proposal_list, img_metas): 33 | proposals[:, :4] /= proposals.new_tensor(meta["scale_factor"]) 34 | 35 | # Convert the rpn-proposals into bbox results format. < 36 | # proposal_list[0].shape = [200,5] 37 | bbox_results = [] 38 | for det_bboxes in proposal_list: 39 | det_labels = torch.zeros((det_bboxes.size(0))).to(det_bboxes.device) 40 | bbox_results.append(bbox2result(det_bboxes, det_labels, num_classes=1)) 41 | 42 | return bbox_results 43 | # > 44 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .accuracy import Accuracy, accuracy 9 | from .ae_loss import AssociativeEmbeddingLoss 10 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss 11 | from .cross_entropy_loss import ( 12 | binary_cross_entropy, 13 | cross_entropy, 14 | CrossEntropyLoss, 15 | mask_cross_entropy, 16 | ) 17 | from .focal_loss import FocalLoss, sigmoid_focal_loss 18 | from .gaussian_focal_loss import GaussianFocalLoss 19 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss 20 | from .ghm_loss import GHMC, GHMR 21 | from .iou_loss import ( 22 | bounded_iou_loss, 23 | BoundedIoULoss, 24 | CIoULoss, 25 | DIoULoss, 26 | GIoULoss, 27 | iou_loss, 28 | IoULoss, 29 | ) 30 | from .mse_loss import mse_loss, MSELoss 31 | from .pisa_loss import carl_loss, isr_p 32 | from .smooth_l1_loss import l1_loss, L1Loss, smooth_l1_loss, SmoothL1Loss 33 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 34 | from .varifocal_loss import VarifocalLoss 35 | 36 | __all__ = [ 37 | "accuracy", 38 | "Accuracy", 39 | "cross_entropy", 40 | "binary_cross_entropy", 41 | "mask_cross_entropy", 42 | "CrossEntropyLoss", 43 | "sigmoid_focal_loss", 44 | "FocalLoss", 45 | "smooth_l1_loss", 46 | "SmoothL1Loss", 47 | "balanced_l1_loss", 48 | "BalancedL1Loss", 49 | "mse_loss", 50 | "MSELoss", 51 | "iou_loss", 52 | "bounded_iou_loss", 53 | "IoULoss", 54 | "BoundedIoULoss", 55 | "GIoULoss", 56 | "DIoULoss", 57 | "CIoULoss", 58 | "GHMC", 59 | "GHMR", 60 | "reduce_loss", 61 | "weight_reduce_loss", 62 | "weighted_loss", 63 | "L1Loss", 64 | "l1_loss", 65 | "isr_p", 66 | "carl_loss", 67 | "AssociativeEmbeddingLoss", 68 | "GaussianFocalLoss", 69 | "QualityFocalLoss", 70 | "DistributionFocalLoss", 71 | "VarifocalLoss", 72 | ] 73 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch.nn as nn 9 | 10 | 11 | def accuracy(pred, target, topk=1, thresh=None): 12 | """Calculate accuracy according to the prediction and target. 13 | 14 | Args: 15 | pred (torch.Tensor): The model prediction, shape (N, num_class) 16 | target (torch.Tensor): The target of each prediction, shape (N, ) 17 | topk (int | tuple[int], optional): If the predictions in ``topk`` 18 | matches the target, the predictions will be regarded as 19 | correct ones. Defaults to 1. 20 | thresh (float, optional): If not None, predictions with scores under 21 | this threshold are considered incorrect. Default to None. 22 | 23 | Returns: 24 | float | tuple[float]: If the input ``topk`` is a single integer, 25 | the function will return a single float as accuracy. If 26 | ``topk`` is a tuple containing multiple integers, the 27 | function will return a tuple containing accuracies of 28 | each ``topk`` number. 29 | """ 30 | assert isinstance(topk, (int, tuple)) 31 | if isinstance(topk, int): 32 | topk = (topk,) 33 | return_single = True 34 | else: 35 | return_single = False 36 | 37 | maxk = max(topk) 38 | if pred.size(0) == 0: 39 | accu = [pred.new_tensor(0.0) for i in range(len(topk))] 40 | return accu[0] if return_single else accu 41 | assert pred.ndim == 2 and target.ndim == 1 42 | assert pred.size(0) == target.size(0) 43 | assert maxk <= pred.size(1), f"maxk {maxk} exceeds pred dimension {pred.size(1)}" 44 | pred_value, pred_label = pred.topk(maxk, dim=1) 45 | pred_label = pred_label.t() # transpose to shape (maxk, N) 46 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 47 | if thresh is not None: 48 | # Only prediction values larger than thresh are counted as correct 49 | correct = correct & (pred_value > thresh).t() 50 | res = [] 51 | for k in topk: 52 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 53 | res.append(correct_k.mul_(100.0 / pred.size(0))) 54 | return res[0] if return_single else res 55 | 56 | 57 | class Accuracy(nn.Module): 58 | def __init__(self, topk=(1,), thresh=None): 59 | """Module to calculate the accuracy. 60 | 61 | Args: 62 | topk (tuple, optional): The criterion used to calculate the 63 | accuracy. Defaults to (1,). 64 | thresh (float, optional): If not None, predictions with scores 65 | under this threshold are considered incorrect. Default to None. 66 | """ 67 | super().__init__() 68 | self.topk = topk 69 | self.thresh = thresh 70 | 71 | def forward(self, pred, target): 72 | """Forward function to calculate accuracy. 73 | 74 | Args: 75 | pred (torch.Tensor): Prediction of models. 76 | target (torch.Tensor): Target for each prediction. 77 | 78 | Returns: 79 | tuple[float]: The accuracies under different topk criterions. 80 | """ 81 | return accuracy(pred, target, self.topk, self.thresh) 82 | -------------------------------------------------------------------------------- /mmdet/models/losses/gaussian_focal_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch.nn as nn 9 | 10 | from ..builder import LOSSES 11 | from .utils import weighted_loss 12 | 13 | 14 | @weighted_loss 15 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): 16 | """`Focal Loss `_ for targets in gaussian 17 | distribution. 18 | 19 | Args: 20 | pred (torch.Tensor): The prediction. 21 | gaussian_target (torch.Tensor): The learning target of the prediction 22 | in gaussian distribution. 23 | alpha (float, optional): A balanced form for Focal Loss. 24 | Defaults to 2.0. 25 | gamma (float, optional): The gamma for calculating the modulating 26 | factor. Defaults to 4.0. 27 | """ 28 | eps = 1e-12 29 | pos_weights = gaussian_target.eq(1) 30 | neg_weights = (1 - gaussian_target).pow(gamma) 31 | pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights 32 | neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights 33 | return pos_loss + neg_loss 34 | 35 | 36 | @LOSSES.register_module() 37 | class GaussianFocalLoss(nn.Module): 38 | """GaussianFocalLoss is a variant of focal loss. 39 | 40 | More details can be found in the `paper 41 | `_ 42 | Code is modified from `kp_utils.py 43 | `_ # noqa: E501 44 | Please notice that the target in GaussianFocalLoss is a gaussian heatmap, 45 | not 0/1 binary target. 46 | 47 | Args: 48 | alpha (float): Power of prediction. 49 | gamma (float): Power of target for negtive samples. 50 | reduction (str): Options are "none", "mean" and "sum". 51 | loss_weight (float): Loss weight of current loss. 52 | """ 53 | 54 | def __init__(self, alpha=2.0, gamma=4.0, reduction="mean", loss_weight=1.0): 55 | super(GaussianFocalLoss, self).__init__() 56 | self.alpha = alpha 57 | self.gamma = gamma 58 | self.reduction = reduction 59 | self.loss_weight = loss_weight 60 | 61 | def forward( 62 | self, pred, target, weight=None, avg_factor=None, reduction_override=None 63 | ): 64 | """Forward function. 65 | 66 | Args: 67 | pred (torch.Tensor): The prediction. 68 | target (torch.Tensor): The learning target of the prediction 69 | in gaussian distribution. 70 | weight (torch.Tensor, optional): The weight of loss for each 71 | prediction. Defaults to None. 72 | avg_factor (int, optional): Average factor that is used to average 73 | the loss. Defaults to None. 74 | reduction_override (str, optional): The reduction method used to 75 | override the original reduction method of the loss. 76 | Defaults to None. 77 | """ 78 | assert reduction_override in (None, "none", "mean", "sum") 79 | reduction = reduction_override if reduction_override else self.reduction 80 | loss_reg = self.loss_weight * gaussian_focal_loss( 81 | pred, 82 | target, 83 | weight, 84 | alpha=self.alpha, 85 | gamma=self.gamma, 86 | reduction=reduction, 87 | avg_factor=avg_factor, 88 | ) 89 | return loss_reg 90 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from ..builder import LOSSES 12 | from .utils import weighted_loss 13 | 14 | 15 | @weighted_loss 16 | def mse_loss(pred, target): 17 | """Warpper of mse loss.""" 18 | return F.mse_loss(pred, target, reduction="none") 19 | 20 | 21 | @LOSSES.register_module() 22 | class MSELoss(nn.Module): 23 | """MSELoss. 24 | 25 | Args: 26 | reduction (str, optional): The method that reduces the loss to a 27 | scalar. Options are "none", "mean" and "sum". 28 | loss_weight (float, optional): The weight of the loss. Defaults to 1.0 29 | """ 30 | 31 | def __init__(self, reduction="mean", loss_weight=1.0): 32 | super().__init__() 33 | self.reduction = reduction 34 | self.loss_weight = loss_weight 35 | 36 | def forward(self, pred, target, weight=None, avg_factor=None): 37 | """Forward function of loss. 38 | 39 | Args: 40 | pred (torch.Tensor): The prediction. 41 | target (torch.Tensor): The learning target of the prediction. 42 | weight (torch.Tensor, optional): Weight of the loss for each 43 | prediction. Defaults to None. 44 | avg_factor (int, optional): Average factor that is used to average 45 | the loss. Defaults to None. 46 | 47 | Returns: 48 | torch.Tensor: The calculated loss 49 | """ 50 | loss = self.loss_weight * mse_loss( 51 | pred, target, weight, reduction=self.reduction, avg_factor=avg_factor 52 | ) 53 | return loss 54 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import functools 9 | 10 | import torch.nn.functional as F 11 | 12 | 13 | def reduce_loss(loss, reduction): 14 | """Reduce loss as specified. 15 | 16 | Args: 17 | loss (Tensor): Elementwise loss tensor. 18 | reduction (str): Options are "none", "mean" and "sum". 19 | 20 | Return: 21 | Tensor: Reduced loss tensor. 22 | """ 23 | reduction_enum = F._Reduction.get_enum(reduction) 24 | # none: 0, elementwise_mean:1, sum: 2 25 | if reduction_enum == 0: 26 | return loss 27 | elif reduction_enum == 1: 28 | return loss.mean() 29 | elif reduction_enum == 2: 30 | return loss.sum() 31 | 32 | 33 | def weight_reduce_loss(loss, weight=None, reduction="mean", avg_factor=None): 34 | """Apply element-wise weight and reduce loss. 35 | 36 | Args: 37 | loss (Tensor): Element-wise loss. 38 | weight (Tensor): Element-wise weights. 39 | reduction (str): Same as built-in losses of PyTorch. 40 | avg_factor (float): Avarage factor when computing the mean of losses. 41 | 42 | Returns: 43 | Tensor: Processed loss values. 44 | """ 45 | # if weight is specified, apply element-wise weight 46 | if weight is not None: 47 | loss = loss * weight 48 | 49 | # if avg_factor is not specified, just reduce the loss 50 | if avg_factor is None: 51 | loss = reduce_loss(loss, reduction) 52 | else: 53 | # if reduction is mean, then average the loss by avg_factor 54 | if reduction == "mean": 55 | loss = loss.sum() / avg_factor 56 | # if reduction is 'none', then do nothing, otherwise raise an error 57 | elif reduction != "none": 58 | raise ValueError('avg_factor can not be used with reduction="sum"') 59 | return loss 60 | 61 | 62 | def weighted_loss(loss_func): 63 | """Create a weighted version of a given loss function. 64 | 65 | To use this decorator, the loss function must have the signature like 66 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 67 | element-wise loss without any reduction. This decorator will add weight 68 | and reduction arguments to the function. The decorated function will have 69 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 70 | avg_factor=None, **kwargs)`. 71 | 72 | :Example: 73 | 74 | >>> import torch 75 | >>> @weighted_loss 76 | >>> def l1_loss(pred, target): 77 | >>> return (pred - target).abs() 78 | 79 | >>> pred = torch.Tensor([0, 2, 3]) 80 | >>> target = torch.Tensor([1, 1, 1]) 81 | >>> weight = torch.Tensor([1, 0, 1]) 82 | 83 | >>> l1_loss(pred, target) 84 | tensor(1.3333) 85 | >>> l1_loss(pred, target, weight) 86 | tensor(1.) 87 | >>> l1_loss(pred, target, reduction='none') 88 | tensor([1., 1., 2.]) 89 | >>> l1_loss(pred, target, weight, avg_factor=2) 90 | tensor(1.5000) 91 | """ 92 | 93 | @functools.wraps(loss_func) 94 | def wrapper(pred, target, weight=None, reduction="mean", avg_factor=None, **kwargs): 95 | # get element-wise loss 96 | loss = loss_func(pred, target, **kwargs) 97 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 98 | return loss 99 | 100 | return wrapper 101 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .fpn import FPN 9 | 10 | __all__ = [ 11 | "FPN", 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .base_roi_head import BaseRoIHead 9 | from .bbox_heads import ( 10 | BBoxHead, 11 | ConvFCBBoxHead, 12 | Shared2FCBBoxHead, 13 | Shared4Conv1FCBBoxHead, 14 | ) 15 | from .mask_heads import ( 16 | CoarseMaskHead, 17 | FCNMaskHead, 18 | FusedSemanticHead, 19 | GridHead, 20 | HTCMaskHead, 21 | MaskIoUHead, 22 | MaskPointHead, 23 | ) 24 | from .oln_roi_head import OlnRoIHead 25 | from .rec_roi_head import RecRoIHead 26 | from .roi_extractors import SingleRoIExtractor 27 | from .shared_heads import ResLayer 28 | from .standard_roi_head import StandardRoIHead 29 | 30 | __all__ = [ 31 | "BaseRoIHead", 32 | "ResLayer", 33 | "BBoxHead", 34 | "ConvFCBBoxHead", 35 | "Shared2FCBBoxHead", 36 | "StandardRoIHead", 37 | "Shared4Conv1FCBBoxHead", 38 | "FCNMaskHead", 39 | "SingleRoIExtractor", 40 | "OlnRoIHead", 41 | "RecRoIHead", 42 | ] 43 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/base_roi_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from abc import ABCMeta, abstractmethod 9 | 10 | import torch.nn as nn 11 | 12 | from ..builder import build_shared_head 13 | 14 | 15 | class BaseRoIHead(nn.Module, metaclass=ABCMeta): 16 | """Base class for RoIHeads.""" 17 | 18 | def __init__( 19 | self, 20 | bbox_roi_extractor=None, 21 | bbox_head=None, 22 | mask_roi_extractor=None, 23 | mask_head=None, 24 | shared_head=None, 25 | train_cfg=None, 26 | test_cfg=None, 27 | ): 28 | super(BaseRoIHead, self).__init__() 29 | self.train_cfg = train_cfg 30 | self.test_cfg = test_cfg 31 | if shared_head is not None: 32 | self.shared_head = build_shared_head(shared_head) 33 | 34 | if bbox_head is not None: 35 | self.init_bbox_head(bbox_roi_extractor, bbox_head) 36 | 37 | if mask_head is not None: 38 | self.init_mask_head(mask_roi_extractor, mask_head) 39 | 40 | self.init_assigner_sampler() 41 | 42 | @property 43 | def with_bbox(self): 44 | """bool: whether the RoI head contains a `bbox_head`""" 45 | return hasattr(self, "bbox_head") and self.bbox_head is not None 46 | 47 | @property 48 | def with_mask(self): 49 | """bool: whether the RoI head contains a `mask_head`""" 50 | return hasattr(self, "mask_head") and self.mask_head is not None 51 | 52 | @property 53 | def with_shared_head(self): 54 | """bool: whether the RoI head contains a `shared_head`""" 55 | return hasattr(self, "shared_head") and self.shared_head is not None 56 | 57 | @abstractmethod 58 | def init_weights(self, pretrained): 59 | """Initialize the weights in head. 60 | 61 | Args: 62 | pretrained (str, optional): Path to pre-trained weights. 63 | Defaults to None. 64 | """ 65 | pass 66 | 67 | @abstractmethod 68 | def init_bbox_head(self): 69 | """Initialize ``bbox_head``""" 70 | pass 71 | 72 | @abstractmethod 73 | def init_mask_head(self): 74 | """Initialize ``mask_head``""" 75 | pass 76 | 77 | @abstractmethod 78 | def init_assigner_sampler(self): 79 | """Initialize assigner and sampler.""" 80 | pass 81 | 82 | @abstractmethod 83 | def forward_train( 84 | self, 85 | x, 86 | img_meta, 87 | proposal_list, 88 | gt_bboxes, 89 | gt_labels, 90 | gt_bboxes_ignore=None, 91 | gt_masks=None, 92 | **kwargs, 93 | ): 94 | """Forward function during training.""" 95 | pass 96 | 97 | async def async_simple_test(self, x, img_meta, **kwargs): 98 | """Asynchronized test function.""" 99 | raise NotImplementedError 100 | 101 | def simple_test( 102 | self, x, proposal_list, img_meta, proposals=None, rescale=False, **kwargs 103 | ): 104 | """Test without augmentation.""" 105 | pass 106 | 107 | def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs): 108 | """Test with augmentations. 109 | 110 | If rescale is False, then returned bboxes and masks will fit the scale 111 | of imgs[0]. 112 | """ 113 | pass 114 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .bbox_head import BBoxHead 9 | from .convfc_bbox_head import ConvFCBBoxHead, Shared2FCBBoxHead, Shared4Conv1FCBBoxHead 10 | from .convfc_bbox_score_head import ConvFCBBoxScoreHead, Shared2FCBBoxScoreHead 11 | 12 | __all__ = [ 13 | "BBoxHead", 14 | "ConvFCBBoxHead", 15 | "Shared2FCBBoxHead", 16 | "Shared4Conv1FCBBoxHead", 17 | "ConvFCBBoxScoreHead", 18 | "Shared2FCBBoxScoreHead", 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .coarse_mask_head import CoarseMaskHead 9 | from .fcn_mask_head import FCNMaskHead 10 | from .fused_semantic_head import FusedSemanticHead 11 | from .grid_head import GridHead 12 | from .htc_mask_head import HTCMaskHead 13 | from .mask_point_head import MaskPointHead 14 | from .maskiou_head import MaskIoUHead 15 | 16 | __all__ = [ 17 | "FCNMaskHead", 18 | "HTCMaskHead", 19 | "FusedSemanticHead", 20 | "GridHead", 21 | "MaskIoUHead", 22 | "CoarseMaskHead", 23 | "MaskPointHead", 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.cnn import ConvModule 9 | from mmdet.models.builder import HEADS 10 | 11 | from .fcn_mask_head import FCNMaskHead 12 | 13 | 14 | @HEADS.register_module() 15 | class HTCMaskHead(FCNMaskHead): 16 | def __init__(self, with_conv_res=True, *args, **kwargs): 17 | super(HTCMaskHead, self).__init__(*args, **kwargs) 18 | self.with_conv_res = with_conv_res 19 | if self.with_conv_res: 20 | self.conv_res = ConvModule( 21 | self.conv_out_channels, 22 | self.conv_out_channels, 23 | 1, 24 | conv_cfg=self.conv_cfg, 25 | norm_cfg=self.norm_cfg, 26 | ) 27 | 28 | def init_weights(self): 29 | super(HTCMaskHead, self).init_weights() 30 | if self.with_conv_res: 31 | self.conv_res.init_weights() 32 | 33 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 34 | if res_feat is not None: 35 | assert self.with_conv_res 36 | res_feat = self.conv_res(res_feat) 37 | x = x + res_feat 38 | for conv in self.convs: 39 | x = conv(x) 40 | res_feat = x 41 | outs = [] 42 | if return_logits: 43 | x = self.upsample(x) 44 | if self.upsample_method == "deconv": 45 | x = self.relu(x) 46 | mask_pred = self.conv_logits(x) 47 | outs.append(mask_pred) 48 | if return_feat: 49 | outs.append(res_feat) 50 | return outs if len(outs) > 1 else outs[0] 51 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .generic_roi_extractor import GenericRoIExtractor 9 | from .single_level_roi_extractor import SingleRoIExtractor 10 | 11 | __all__ = [ 12 | "SingleRoIExtractor", 13 | "GenericRoIExtractor", 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from abc import ABCMeta, abstractmethod 9 | 10 | import torch 11 | import torch.nn as nn 12 | from mmcv import ops 13 | 14 | 15 | class BaseRoIExtractor(nn.Module, metaclass=ABCMeta): 16 | """Base class for RoI extractor. 17 | 18 | Args: 19 | roi_layer (dict): Specify RoI layer type and arguments. 20 | out_channels (int): Output channels of RoI layers. 21 | featmap_strides (int): Strides of input feature maps. 22 | """ 23 | 24 | def __init__(self, roi_layer, out_channels, featmap_strides): 25 | super(BaseRoIExtractor, self).__init__() 26 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 27 | self.out_channels = out_channels 28 | self.featmap_strides = featmap_strides 29 | self.fp16_enabled = False 30 | 31 | @property 32 | def num_inputs(self): 33 | """int: Number of input feature maps.""" 34 | return len(self.featmap_strides) 35 | 36 | def init_weights(self): 37 | pass 38 | 39 | def build_roi_layers(self, layer_cfg, featmap_strides): 40 | """Build RoI operator to extract feature from each level feature map. 41 | 42 | Args: 43 | layer_cfg (dict): Dictionary to construct and config RoI layer 44 | operation. Options are modules under ``mmcv/ops`` such as 45 | ``RoIAlign``. 46 | featmap_strides (int): The stride of input feature map w.r.t to the 47 | original image size, which would be used to scale RoI 48 | coordinate (original image coordinate system) to feature 49 | coordinate system. 50 | 51 | Returns: 52 | nn.ModuleList: The RoI extractor modules for each level feature 53 | map. 54 | """ 55 | 56 | cfg = layer_cfg.copy() 57 | layer_type = cfg.pop("type") 58 | assert hasattr(ops, layer_type) 59 | layer_cls = getattr(ops, layer_type) 60 | roi_layers = nn.ModuleList( 61 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides] 62 | ) 63 | return roi_layers 64 | 65 | def roi_rescale(self, rois, scale_factor): 66 | """Scale RoI coordinates by scale factor. 67 | 68 | Args: 69 | rois (torch.Tensor): RoI (Region of Interest), shape (n, 5) 70 | scale_factor (float): Scale factor that RoI will be multiplied by. 71 | 72 | Returns: 73 | torch.Tensor: Scaled RoI. 74 | """ 75 | 76 | cx = (rois[:, 1] + rois[:, 3]) * 0.5 77 | cy = (rois[:, 2] + rois[:, 4]) * 0.5 78 | w = rois[:, 3] - rois[:, 1] 79 | h = rois[:, 4] - rois[:, 2] 80 | new_w = w * scale_factor 81 | new_h = h * scale_factor 82 | x1 = cx - new_w * 0.5 83 | x2 = cx + new_w * 0.5 84 | y1 = cy - new_h * 0.5 85 | y2 = cy + new_h * 0.5 86 | new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1) 87 | return new_rois 88 | 89 | @abstractmethod 90 | def forward(self, feats, rois, roi_scale_factor=None): 91 | pass 92 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.cnn.bricks import build_plugin_layer 9 | from mmcv.runner import force_fp32 10 | from mmdet.models.builder import ROI_EXTRACTORS 11 | 12 | from .base_roi_extractor import BaseRoIExtractor 13 | 14 | 15 | @ROI_EXTRACTORS.register_module() 16 | class GenericRoIExtractor(BaseRoIExtractor): 17 | """Extract RoI features from all level feature maps levels. 18 | 19 | This is the implementation of `A novel Region of Interest Extraction Layer 20 | for Instance Segmentation `_. 21 | 22 | Args: 23 | aggregation (str): The method to aggregate multiple feature maps. 24 | Options are 'sum', 'concat'. Default: 'sum'. 25 | pre_cfg (dict | None): Specify pre-processing modules. Default: None. 26 | post_cfg (dict | None): Specify post-processing modules. Default: None. 27 | kwargs (keyword arguments): Arguments that are the same 28 | as :class:`BaseRoIExtractor`. 29 | """ 30 | 31 | def __init__(self, aggregation="sum", pre_cfg=None, post_cfg=None, **kwargs): 32 | super(GenericRoIExtractor, self).__init__(**kwargs) 33 | 34 | assert aggregation in ["sum", "concat"] 35 | 36 | self.aggregation = aggregation 37 | self.with_post = post_cfg is not None 38 | self.with_pre = pre_cfg is not None 39 | # build pre/post processing modules 40 | if self.with_post: 41 | self.post_module = build_plugin_layer(post_cfg, "_post_module")[1] 42 | if self.with_pre: 43 | self.pre_module = build_plugin_layer(pre_cfg, "_pre_module")[1] 44 | 45 | @force_fp32(apply_to=("feats",), out_fp16=True) 46 | def forward(self, feats, rois, roi_scale_factor=None): 47 | """Forward function.""" 48 | if len(feats) == 1: 49 | return self.roi_layers[0](feats[0], rois) 50 | 51 | out_size = self.roi_layers[0].output_size 52 | num_levels = len(feats) 53 | roi_feats = feats[0].new_zeros(rois.size(0), self.out_channels, *out_size) 54 | 55 | # some times rois is an empty tensor 56 | if roi_feats.shape[0] == 0: 57 | return roi_feats 58 | 59 | if roi_scale_factor is not None: 60 | rois = self.roi_rescale(rois, roi_scale_factor) 61 | 62 | # mark the starting channels for concat mode 63 | start_channels = 0 64 | for i in range(num_levels): 65 | roi_feats_t = self.roi_layers[i](feats[i], rois) 66 | end_channels = start_channels + roi_feats_t.size(1) 67 | if self.with_pre: 68 | # apply pre-processing to a RoI extracted from each layer 69 | roi_feats_t = self.pre_module(roi_feats_t) 70 | if self.aggregation == "sum": 71 | # and sum them all 72 | roi_feats += roi_feats_t 73 | else: 74 | # and concat them along channel dimension 75 | roi_feats[:, start_channels:end_channels] = roi_feats_t 76 | # update channels starting position 77 | start_channels = end_channels 78 | # check if concat channels match at the end 79 | if self.aggregation == "concat": 80 | assert start_channels == self.out_channels 81 | 82 | if self.with_post: 83 | # apply post-processing before return the result 84 | roi_feats = self.post_module(roi_feats) 85 | return roi_feats 86 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .res_layer import ResLayer 9 | 10 | __all__ = ["ResLayer"] 11 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch.nn as nn 9 | from mmcv.cnn import constant_init, kaiming_init 10 | from mmcv.runner import auto_fp16, load_checkpoint 11 | from mmdet.models.backbones import ResNet 12 | from mmdet.models.builder import SHARED_HEADS 13 | from mmdet.models.utils import ResLayer as _ResLayer 14 | from mmdet.utils import get_root_logger 15 | 16 | 17 | @SHARED_HEADS.register_module() 18 | class ResLayer(nn.Module): 19 | def __init__( 20 | self, 21 | depth, 22 | stage=3, 23 | stride=2, 24 | dilation=1, 25 | style="pytorch", 26 | norm_cfg=dict(type="BN", requires_grad=True), 27 | norm_eval=True, 28 | with_cp=False, 29 | dcn=None, 30 | ): 31 | super(ResLayer, self).__init__() 32 | self.norm_eval = norm_eval 33 | self.norm_cfg = norm_cfg 34 | self.stage = stage 35 | self.fp16_enabled = False 36 | block, stage_blocks = ResNet.arch_settings[depth] 37 | stage_block = stage_blocks[stage] 38 | planes = 64 * 2**stage 39 | inplanes = 64 * 2 ** (stage - 1) * block.expansion 40 | 41 | res_layer = _ResLayer( 42 | block, 43 | inplanes, 44 | planes, 45 | stage_block, 46 | stride=stride, 47 | dilation=dilation, 48 | style=style, 49 | with_cp=with_cp, 50 | norm_cfg=self.norm_cfg, 51 | dcn=dcn, 52 | ) 53 | self.add_module(f"layer{stage + 1}", res_layer) 54 | 55 | def init_weights(self, pretrained=None): 56 | """Initialize the weights in the module. 57 | 58 | Args: 59 | pretrained (str, optional): Path to pre-trained weights. 60 | Defaults to None. 61 | """ 62 | if isinstance(pretrained, str): 63 | logger = get_root_logger() 64 | load_checkpoint(self, pretrained, strict=False, logger=logger) 65 | elif pretrained is None: 66 | for m in self.modules(): 67 | if isinstance(m, nn.Conv2d): 68 | kaiming_init(m) 69 | elif isinstance(m, nn.BatchNorm2d): 70 | constant_init(m, 1) 71 | else: 72 | raise TypeError("pretrained must be a str or None") 73 | 74 | @auto_fp16() 75 | def forward(self, x): 76 | res_layer = getattr(self, f"layer{self.stage + 1}") 77 | out = res_layer(x) 78 | return out 79 | 80 | def train(self, mode=True): 81 | super(ResLayer, self).train(mode) 82 | if self.norm_eval: 83 | for m in self.modules(): 84 | if isinstance(m, nn.BatchNorm2d): 85 | m.eval() 86 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .builder import build_positional_encoding, build_transformer 9 | from .gaussian_target import gaussian_radius, gen_gaussian_target 10 | from .positional_encoding import LearnedPositionalEncoding, SinePositionalEncoding 11 | from .res_layer import ResLayer 12 | from .transformer import ( 13 | FFN, 14 | MultiheadAttention, 15 | Transformer, 16 | TransformerDecoder, 17 | TransformerDecoderLayer, 18 | TransformerEncoder, 19 | TransformerEncoderLayer, 20 | ) 21 | 22 | __all__ = [ 23 | "ResLayer", 24 | "gaussian_radius", 25 | "gen_gaussian_target", 26 | "MultiheadAttention", 27 | "FFN", 28 | "TransformerEncoderLayer", 29 | "TransformerEncoder", 30 | "TransformerDecoderLayer", 31 | "TransformerDecoder", 32 | "Transformer", 33 | "build_transformer", 34 | "build_positional_encoding", 35 | "SinePositionalEncoding", 36 | "LearnedPositionalEncoding", 37 | ] 38 | -------------------------------------------------------------------------------- /mmdet/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmcv.utils import build_from_cfg, Registry 9 | 10 | TRANSFORMER = Registry("Transformer") 11 | POSITIONAL_ENCODING = Registry("Position encoding") 12 | 13 | 14 | def build_transformer(cfg, default_args=None): 15 | """Builder for Transformer.""" 16 | return build_from_cfg(cfg, TRANSFORMER, default_args) 17 | 18 | 19 | def build_positional_encoding(cfg, default_args=None): 20 | """Builder for Position Encoding.""" 21 | return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) 22 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from .collect_env import collect_env 9 | from .logger import get_root_logger 10 | 11 | __all__ = ["get_root_logger", "collect_env"] 12 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import mmdet 9 | from mmcv.utils import collect_env as collect_base_env, get_git_hash 10 | 11 | 12 | def collect_env(): 13 | """Collect the information of the running environments.""" 14 | env_info = collect_base_env() 15 | env_info["MMDetection"] = mmdet.__version__ + "+" + get_git_hash()[:7] 16 | return env_info 17 | 18 | 19 | if __name__ == "__main__": 20 | for name, val in collect_env().items(): 21 | print(f"{name}: {val}") 22 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import logging 9 | 10 | from mmcv.utils import get_logger 11 | 12 | 13 | def get_root_logger(log_file=None, log_level=logging.INFO): 14 | """Get root logger. 15 | 16 | Args: 17 | log_file (str, optional): File path of log. Defaults to None. 18 | log_level (int, optional): The level of logger. 19 | Defaults to logging.INFO. 20 | 21 | Returns: 22 | :obj:`logging.Logger`: The obtained logger 23 | """ 24 | logger = get_logger(name="mmdet", log_file=log_file, log_level=log_level) 25 | 26 | return logger 27 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import contextlib 9 | import sys 10 | import time 11 | 12 | import torch 13 | 14 | if sys.version_info >= (3, 7): 15 | 16 | @contextlib.contextmanager 17 | def profile_time(trace_name, name, enabled=True, stream=None, end_stream=None): 18 | """Print time spent by CPU and GPU. 19 | 20 | Useful as a temporary context manager to find sweet spots of code 21 | suitable for async implementation. 22 | """ 23 | if (not enabled) or not torch.cuda.is_available(): 24 | yield 25 | return 26 | stream = stream if stream else torch.cuda.current_stream() 27 | end_stream = end_stream if end_stream else stream 28 | start = torch.cuda.Event(enable_timing=True) 29 | end = torch.cuda.Event(enable_timing=True) 30 | stream.record_event(start) 31 | try: 32 | cpu_start = time.monotonic() 33 | yield 34 | finally: 35 | cpu_end = time.monotonic() 36 | end_stream.record_event(end) 37 | end.synchronize() 38 | cpu_time = (cpu_end - cpu_start) * 1000 39 | gpu_time = start.elapsed_time(end) 40 | msg = f"{trace_name} {name} cpu_time {cpu_time:.2f} ms " 41 | msg += f"gpu_time {gpu_time:.2f} ms stream {stream}" 42 | print(msg, end_stream) 43 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # Copyright (c) Open-MMLab. All rights reserved. 9 | 10 | __version__ = "2.8.0" 11 | short_version = __version__ 12 | 13 | 14 | def parse_version_info(version_str): 15 | version_info = [] 16 | for x in version_str.split("."): 17 | if x.isdigit(): 18 | version_info.append(int(x)) 19 | elif x.find("rc") != -1: 20 | patch_version = x.split("rc") 21 | version_info.append(int(patch_version[0])) 22 | version_info.append(f"rc{patch_version[1]}") 23 | return tuple(version_info) 24 | 25 | 26 | version_info = parse_version_info(__version__) 27 | -------------------------------------------------------------------------------- /pa_lib/cython_lib/graph_helper.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | # cython: language_level=3 9 | 10 | cimport cython 11 | cimport numpy as np 12 | from libc.stdint cimport int32_t, int64_t 13 | 14 | import numpy as np 15 | 16 | cdef int64_t loc2idx( 17 | int32_t i, 18 | int32_t j, 19 | int32_t width, 20 | ): 21 | cdef int64_t index = i * width + j 22 | return index 23 | 24 | @cython.boundscheck(False) 25 | @cython.wraparound(False) 26 | @cython.nonecheck(False) 27 | cdef list generate_nodes_edges_labels_c( 28 | np.ndarray[np.float32_t, ndim=3] potentials, 29 | ): 30 | cdef int32_t height = potentials.shape[1] 31 | cdef int32_t width = potentials.shape[2] 32 | cdef np.ndarray[np.uint32_t, ndim=2] new_label 33 | 34 | cdef int64_t num_nodes = int(potentials.size / 4) 35 | 36 | cdef list out_nodes = [] 37 | cdef dict property 38 | cdef tuple node 39 | for i in range(num_nodes): 40 | property = {"labels": [i]} 41 | node = (i, property) 42 | out_nodes.append(node) 43 | 44 | cdef np.ndarray[np.float32_t, ndim=1] potential 45 | cdef int64_t curr_idx, neighbor 46 | cdef list out_edges = [] 47 | cdef tuple edge 48 | 49 | new_label = np.zeros((height, width), dtype=np.uint32) 50 | 51 | for i in range(height): 52 | for j in range(width): 53 | potential = potentials[:, i, j] 54 | curr_idx = loc2idx(i, j, width) 55 | new_label[i, j] = curr_idx 56 | if i - 1 >= 0: 57 | neighbor = loc2idx(i - 1, j, width) 58 | edge = (curr_idx, neighbor, 1 - potential[0]) 59 | out_edges.append(edge) 60 | if j - 1 >= 0: 61 | neighbor = loc2idx(i, j - 1, width) 62 | edge = (curr_idx, neighbor, 1 - potential[1]) 63 | out_edges.append(edge) 64 | if i - 1 >= 0 and j - 1 >= 0: 65 | neighbor = loc2idx(i - 1, j - 1, width) 66 | edge = (curr_idx, neighbor, 1 - potential[2]) 67 | out_edges.append(edge) 68 | if i + 1 < height and j - 1 >= 0: 69 | neighbor = loc2idx(i + 1, j - 1, width) 70 | edge = (curr_idx, neighbor, 1 - potential[3]) 71 | out_edges.append(edge) 72 | return [out_nodes, out_edges, new_label] 73 | 74 | 75 | def generate_nodes_edges_labels( 76 | np.ndarray potentials, 77 | ) -> list: 78 | return generate_nodes_edges_labels_c(potentials) 79 | -------------------------------------------------------------------------------- /pa_lib/cython_lib/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from distutils.core import setup 9 | from distutils.extension import Extension 10 | 11 | import numpy 12 | from Cython.Build import cythonize 13 | 14 | ext_modules = [ 15 | Extension(r"cython_lib.graph_helper", [r"graph_helper.pyx"]), 16 | ] 17 | 18 | setup( 19 | name="cython_lib", 20 | ext_modules=cythonize(ext_modules), 21 | include_dirs=[numpy.get_include()], 22 | ) 23 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | mmlvis 5 | scipy 6 | sklearn 7 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | mmpycocotools 3 | numpy 4 | six 5 | terminaltables 6 | tensorboard 7 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort==4.3.21 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | ubelt 10 | xdoctest>=0.10.0 11 | yapf 12 | -------------------------------------------------------------------------------- /resources/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/resources/coco_test_12510.jpg -------------------------------------------------------------------------------- /resources/corruptions_sev_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/resources/corruptions_sev_3.png -------------------------------------------------------------------------------- /resources/data_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/resources/data_pipeline.png -------------------------------------------------------------------------------- /resources/loss_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/resources/loss_curve.png -------------------------------------------------------------------------------- /resources/mmdet-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Generic-Grouping/c0f33d76b207f937b538bfecf15d99672e068158/resources/mmdet-logo.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | """Tests for async interface.""" 9 | 10 | import asyncio 11 | import os 12 | import sys 13 | 14 | import asynctest 15 | import mmcv 16 | import torch 17 | from mmdet.apis import async_inference_detector, init_detector 18 | 19 | if sys.version_info >= (3, 7): 20 | from mmdet.utils.contextmanagers import concurrent 21 | 22 | 23 | class AsyncTestCase(asynctest.TestCase): 24 | use_default_loop = False 25 | forbid_get_event_loop = True 26 | 27 | TEST_TIMEOUT = int(os.getenv("ASYNCIO_TEST_TIMEOUT", "30")) 28 | 29 | def _run_test_method(self, method): 30 | result = method() 31 | if asyncio.iscoroutine(result): 32 | self.loop.run_until_complete( 33 | asyncio.wait_for(result, timeout=self.TEST_TIMEOUT) 34 | ) 35 | 36 | 37 | class MaskRCNNDetector: 38 | def __init__( 39 | self, model_config, checkpoint=None, streamqueue_size=3, device="cuda:0" 40 | ): 41 | self.streamqueue_size = streamqueue_size 42 | self.device = device 43 | # build the model and load checkpoint 44 | self.model = init_detector(model_config, checkpoint=None, device=self.device) 45 | self.streamqueue = None 46 | 47 | async def init(self): 48 | self.streamqueue = asyncio.Queue() 49 | for _ in range(self.streamqueue_size): 50 | stream = torch.cuda.Stream(device=self.device) 51 | self.streamqueue.put_nowait(stream) 52 | 53 | if sys.version_info >= (3, 7): 54 | 55 | async def apredict(self, img): 56 | if isinstance(img, str): 57 | img = mmcv.imread(img) 58 | async with concurrent(self.streamqueue): 59 | result = await async_inference_detector(self.model, img) 60 | return result 61 | 62 | 63 | class AsyncInferenceTestCase(AsyncTestCase): 64 | if sys.version_info >= (3, 7): 65 | 66 | async def test_simple_inference(self): 67 | if not torch.cuda.is_available(): 68 | import pytest 69 | 70 | pytest.skip("test requires GPU and torch+cuda") 71 | 72 | ori_grad_enabled = torch.is_grad_enabled() 73 | root_dir = os.path.dirname(os.path.dirname(__name__)) 74 | model_config = os.path.join( 75 | root_dir, "configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py" 76 | ) 77 | detector = MaskRCNNDetector(model_config) 78 | await detector.init() 79 | img_path = os.path.join(root_dir, "demo/demo.jpg") 80 | bboxes, _ = await detector.apredict(img_path) 81 | self.assertTrue(bboxes) 82 | # asy inference detector will hack grad_enabled, 83 | # so restore here to avoid it to influence other tests 84 | torch.set_grad_enabled(ori_grad_enabled) 85 | -------------------------------------------------------------------------------- /tests/test_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import torch 9 | from mmdet.core.bbox.coder import YOLOBBoxCoder 10 | 11 | 12 | def test_yolo_bbox_coder(): 13 | coder = YOLOBBoxCoder() 14 | bboxes = torch.Tensor( 15 | [ 16 | [-42.0, -29.0, 74.0, 61.0], 17 | [-10.0, -29.0, 106.0, 61.0], 18 | [22.0, -29.0, 138.0, 61.0], 19 | [54.0, -29.0, 170.0, 61.0], 20 | ] 21 | ) 22 | pred_bboxes = torch.Tensor( 23 | [ 24 | [0.4709, 0.6152, 0.1690, -0.4056], 25 | [0.5399, 0.6653, 0.1162, -0.4162], 26 | [0.4654, 0.6618, 0.1548, -0.4301], 27 | [0.4786, 0.6197, 0.1896, -0.4479], 28 | ] 29 | ) 30 | grid_size = 32 31 | expected_decode_bboxes = torch.Tensor( 32 | [ 33 | [-53.6102, -10.3096, 83.7478, 49.6824], 34 | [-15.8700, -8.3901, 114.4236, 50.9693], 35 | [11.1822, -8.0924, 146.6034, 50.4476], 36 | [41.2068, -8.9232, 181.4236, 48.5840], 37 | ] 38 | ) 39 | assert expected_decode_bboxes.allclose(coder.decode(bboxes, pred_bboxes, grid_size)) 40 | -------------------------------------------------------------------------------- /tests/test_data/test_formatting.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import os.path as osp 9 | 10 | from mmcv.utils import build_from_cfg 11 | from mmdet.datasets.builder import PIPELINES 12 | 13 | 14 | def test_default_format_bundle(): 15 | results = dict( 16 | img_prefix=osp.join(osp.dirname(__file__), "../data"), 17 | img_info=dict(filename="color.jpg"), 18 | ) 19 | load = dict(type="LoadImageFromFile") 20 | load = build_from_cfg(load, PIPELINES) 21 | bundle = dict(type="DefaultFormatBundle") 22 | bundle = build_from_cfg(bundle, PIPELINES) 23 | results = load(results) 24 | assert "pad_shape" not in results 25 | assert "scale_factor" not in results 26 | assert "img_norm_cfg" not in results 27 | results = bundle(results) 28 | assert "pad_shape" in results 29 | assert "scale_factor" in results 30 | assert "img_norm_cfg" in results 31 | -------------------------------------------------------------------------------- /tests/test_data/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import pytest 9 | from mmdet.datasets import replace_ImageToTensor 10 | 11 | 12 | def test_replace_ImageToTensor(): 13 | # with MultiScaleFlipAug 14 | pipelines = [ 15 | dict(type="LoadImageFromFile"), 16 | dict( 17 | type="MultiScaleFlipAug", 18 | img_scale=(1333, 800), 19 | flip=False, 20 | transforms=[ 21 | dict(type="Resize", keep_ratio=True), 22 | dict(type="RandomFlip"), 23 | dict(type="Normalize"), 24 | dict(type="Pad", size_divisor=32), 25 | dict(type="ImageToTensor", keys=["img"]), 26 | dict(type="Collect", keys=["img"]), 27 | ], 28 | ), 29 | ] 30 | expected_pipelines = [ 31 | dict(type="LoadImageFromFile"), 32 | dict( 33 | type="MultiScaleFlipAug", 34 | img_scale=(1333, 800), 35 | flip=False, 36 | transforms=[ 37 | dict(type="Resize", keep_ratio=True), 38 | dict(type="RandomFlip"), 39 | dict(type="Normalize"), 40 | dict(type="Pad", size_divisor=32), 41 | dict(type="DefaultFormatBundle"), 42 | dict(type="Collect", keys=["img"]), 43 | ], 44 | ), 45 | ] 46 | with pytest.warns(UserWarning): 47 | assert expected_pipelines == replace_ImageToTensor(pipelines) 48 | 49 | # without MultiScaleFlipAug 50 | pipelines = [ 51 | dict(type="LoadImageFromFile"), 52 | dict(type="Resize", keep_ratio=True), 53 | dict(type="RandomFlip"), 54 | dict(type="Normalize"), 55 | dict(type="Pad", size_divisor=32), 56 | dict(type="ImageToTensor", keys=["img"]), 57 | dict(type="Collect", keys=["img"]), 58 | ] 59 | expected_pipelines = [ 60 | dict(type="LoadImageFromFile"), 61 | dict(type="Resize", keep_ratio=True), 62 | dict(type="RandomFlip"), 63 | dict(type="Normalize"), 64 | dict(type="Pad", size_divisor=32), 65 | dict(type="DefaultFormatBundle"), 66 | dict(type="Collect", keys=["img"]), 67 | ] 68 | with pytest.warns(UserWarning): 69 | assert expected_pipelines == replace_ImageToTensor(pipelines) 70 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import numpy as np 9 | import pytest 10 | import torch 11 | from mmdet.core.mask.structures import BitmapMasks, PolygonMasks 12 | from mmdet.core.utils import mask2ndarray 13 | 14 | 15 | def dummy_raw_polygon_masks(size): 16 | """ 17 | Args: 18 | size (tuple): expected shape of dummy masks, (N, H, W) 19 | 20 | Return: 21 | list[list[ndarray]]: dummy mask 22 | """ 23 | num_obj, heigt, width = size 24 | polygons = [] 25 | for _ in range(num_obj): 26 | num_points = np.random.randint(5) * 2 + 6 27 | polygons.append([np.random.uniform(0, min(heigt, width), num_points)]) 28 | return polygons 29 | 30 | 31 | def test_mask2ndarray(): 32 | raw_masks = np.ones((3, 28, 28)) 33 | bitmap_mask = BitmapMasks(raw_masks, 28, 28) 34 | output_mask = mask2ndarray(bitmap_mask) 35 | assert np.allclose(raw_masks, output_mask) 36 | 37 | raw_masks = dummy_raw_polygon_masks((3, 28, 28)) 38 | polygon_masks = PolygonMasks(raw_masks, 28, 28) 39 | output_mask = mask2ndarray(polygon_masks) 40 | assert output_mask.shape == (3, 28, 28) 41 | 42 | raw_masks = np.ones((3, 28, 28)) 43 | output_mask = mask2ndarray(raw_masks) 44 | assert np.allclose(raw_masks, output_mask) 45 | 46 | raw_masks = torch.ones((3, 28, 28)) 47 | output_mask = mask2ndarray(raw_masks) 48 | assert np.allclose(raw_masks, output_mask) 49 | 50 | # test unsupported type 51 | raw_masks = [] 52 | with pytest.raises(TypeError): 53 | output_mask = mask2ndarray(raw_masks) 54 | -------------------------------------------------------------------------------- /tests/test_models/test_position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import pytest 9 | import torch 10 | from mmdet.models.utils import LearnedPositionalEncoding, SinePositionalEncoding 11 | 12 | 13 | def test_sine_positional_encoding(num_feats=16, batch_size=2): 14 | # test invalid type of scale 15 | with pytest.raises(AssertionError): 16 | module = SinePositionalEncoding(num_feats, scale=(3.0,), normalize=True) 17 | 18 | module = SinePositionalEncoding(num_feats) 19 | h, w = 10, 6 20 | mask = torch.rand(batch_size, h, w) > 0.5 21 | assert not module.normalize 22 | out = module(mask) 23 | assert out.shape == (batch_size, num_feats * 2, h, w) 24 | 25 | # set normalize 26 | module = SinePositionalEncoding(num_feats, normalize=True) 27 | assert module.normalize 28 | out = module(mask) 29 | assert out.shape == (batch_size, num_feats * 2, h, w) 30 | 31 | 32 | def test_learned_positional_encoding( 33 | num_feats=16, row_num_embed=10, col_num_embed=10, batch_size=2 34 | ): 35 | module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed) 36 | assert module.row_embed.weight.shape == (row_num_embed, num_feats) 37 | assert module.col_embed.weight.shape == (col_num_embed, num_feats) 38 | h, w = 10, 6 39 | mask = torch.rand(batch_size, h, w) > 0.5 40 | out = module(mask) 41 | assert out.shape == (batch_size, num_feats * 2, h, w) 42 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | from mmdet import digit_version 9 | 10 | 11 | def test_version_check(): 12 | assert digit_version("1.0.5") > digit_version("1.0.5rc0") 13 | assert digit_version("1.0.5") > digit_version("1.0.4rc0") 14 | assert digit_version("1.0.5") > digit_version("1.0rc0") 15 | assert digit_version("1.0.0") > digit_version("0.6.2") 16 | assert digit_version("1.0.0") > digit_version("0.2.16") 17 | assert digit_version("1.0.5rc0") > digit_version("1.0.0rc0") 18 | assert digit_version("1.0.0rc1") > digit_version("1.0.0rc0") 19 | assert digit_version("1.0.0rc2") > digit_version("1.0.0rc0") 20 | assert digit_version("1.0.0rc2") > digit_version("1.0.0rc1") 21 | assert digit_version("1.0.1rc1") > digit_version("1.0.0rc1") 22 | assert digit_version("1.0.0") > digit_version("1.0.0rc1") 23 | -------------------------------------------------------------------------------- /tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | import os 10 | from pathlib import Path 11 | 12 | import mmcv 13 | from mmcv import Config 14 | from mmdet.core.utils import mask2ndarray 15 | from mmdet.core.visualization import imshow_det_bboxes 16 | from mmdet.datasets.builder import build_dataset 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description="Browse a dataset") 21 | parser.add_argument("config", help="train config file path") 22 | parser.add_argument( 23 | "--skip-type", 24 | type=str, 25 | nargs="+", 26 | default=["DefaultFormatBundle", "Normalize", "Collect"], 27 | help="skip some useless pipeline", 28 | ) 29 | parser.add_argument( 30 | "--output-dir", 31 | default=None, 32 | type=str, 33 | help="If there is no display interface, you can save it", 34 | ) 35 | parser.add_argument("--not-show", default=False, action="store_true") 36 | parser.add_argument( 37 | "--show-interval", type=float, default=2, help="the interval of show (s)" 38 | ) 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def retrieve_data_cfg(config_path, skip_type): 44 | cfg = Config.fromfile(config_path) 45 | train_data_cfg = cfg.data.train 46 | train_data_cfg["pipeline"] = [ 47 | x for x in train_data_cfg.pipeline if x["type"] not in skip_type 48 | ] 49 | 50 | return cfg 51 | 52 | 53 | def main(): 54 | args = parse_args() 55 | cfg = retrieve_data_cfg(args.config, args.skip_type) 56 | 57 | dataset = build_dataset(cfg.data.train) 58 | 59 | progress_bar = mmcv.ProgressBar(len(dataset)) 60 | 61 | for item in dataset: 62 | filename = ( 63 | os.path.join(args.output_dir, Path(item["filename"]).name) 64 | if args.output_dir is not None 65 | else None 66 | ) 67 | 68 | gt_masks = item.get("gt_masks", None) 69 | if gt_masks is not None: 70 | gt_masks = mask2ndarray(gt_masks) 71 | 72 | imshow_det_bboxes( 73 | item["img"], 74 | item["gt_bboxes"], 75 | item["gt_labels"], 76 | gt_masks, 77 | class_names=dataset.CLASSES, 78 | show=not args.not_show, 79 | wait_time=args.show_interval, 80 | out_file=filename, 81 | bbox_color=(255, 102, 61), 82 | text_color=(255, 102, 61), 83 | ) 84 | 85 | progress_bar.update() 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /tools/dist_test_bbox.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | #!/usr/bin/env bash 9 | 10 | CONFIG=$1 11 | CHECKPOINT=$2 12 | GPUS=$3 13 | PORT=${PORT:-29500} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 17 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch "${@:4}" \ 18 | --eval bbox 19 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree 8 | 9 | 10 | CONFIG=$1 11 | GPUS=$2 12 | PORT=${PORT:-29500} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 16 | $(dirname "$0")/train.py $CONFIG --launcher pytorch "${@:3}" --no-validate 17 | -------------------------------------------------------------------------------- /tools/dist_train_and_test_bbox.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree 8 | 9 | 10 | CONFIG=$1 11 | CHECKPOINT=$2 12 | GPUS=$3 13 | PORT=${PORT:-29500} 14 | 15 | 16 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 17 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 18 | $(dirname "$0")/train.py $CONFIG --launcher pytorch "${@:4}" --no-validate 19 | 20 | 21 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 22 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 23 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch "${@:4}" \ 24 | --eval bbox 25 | -------------------------------------------------------------------------------- /tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | 10 | import mmcv 11 | from mmcv import Config, DictAction 12 | from mmdet.datasets import build_dataset 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser( 17 | description="Evaluate metric of the " "results saved in pkl format" 18 | ) 19 | parser.add_argument("config", help="Config of the model") 20 | parser.add_argument("pkl_results", help="Results in pickle format") 21 | parser.add_argument( 22 | "--format-only", 23 | action="store_true", 24 | help="Format the output results without perform evaluation. It is" 25 | "useful when you want to format the result to a specific format and " 26 | "submit it to the test server", 27 | ) 28 | parser.add_argument( 29 | "--eval", 30 | type=str, 31 | nargs="+", 32 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",' 33 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC', 34 | ) 35 | parser.add_argument( 36 | "--cfg-options", 37 | nargs="+", 38 | action=DictAction, 39 | help="override some settings in the used config, the key-value pair " 40 | "in xxx=yyy format will be merged into config file. If the value to " 41 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 42 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 43 | "Note that the quotation marks are necessary and that no white space " 44 | "is allowed.", 45 | ) 46 | parser.add_argument( 47 | "--eval-options", 48 | nargs="+", 49 | action=DictAction, 50 | help="custom options for evaluation, the key-value pair in xxx=yyy " 51 | "format will be kwargs for dataset.evaluate() function", 52 | ) 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | 60 | cfg = Config.fromfile(args.config) 61 | assert args.eval or args.format_only, ( 62 | "Please specify at least one operation (eval/format the results) with " 63 | 'the argument "--eval", "--format-only"' 64 | ) 65 | if args.eval and args.format_only: 66 | raise ValueError("--eval and --format_only cannot be both specified") 67 | 68 | if args.cfg_options is not None: 69 | cfg.merge_from_dict(args.cfg_options) 70 | cfg.data.test.test_mode = True 71 | 72 | dataset = build_dataset(cfg.data.test) 73 | outputs = mmcv.load(args.pkl_results) 74 | 75 | kwargs = {} if args.eval_options is None else args.eval_options 76 | if args.format_only: 77 | dataset.format_results(outputs, **kwargs) 78 | if args.eval: 79 | eval_kwargs = cfg.get("evaluation", {}).copy() 80 | # hard-code way to remove EvalHook args 81 | for key in ["interval", "tmpdir", "start", "gpu_collect", "save_best", "rule"]: 82 | eval_kwargs.pop(key, None) 83 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 84 | print(dataset.evaluate(outputs, **eval_kwargs)) 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | 10 | import torch 11 | from mmcv import Config 12 | from mmdet.models import build_detector 13 | 14 | try: 15 | from mmcv.cnn import get_model_complexity_info 16 | except ImportError: 17 | raise ImportError("Please upgrade mmcv to >0.6.2") 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description="Train a detector") 22 | parser.add_argument("config", help="train config file path") 23 | parser.add_argument( 24 | "--shape", type=int, nargs="+", default=[1280, 800], help="input image size" 25 | ) 26 | args = parser.parse_args() 27 | return args 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | 33 | if len(args.shape) == 1: 34 | input_shape = (3, args.shape[0], args.shape[0]) 35 | elif len(args.shape) == 2: 36 | input_shape = (3,) + tuple(args.shape) 37 | else: 38 | raise ValueError("invalid input shape") 39 | 40 | cfg = Config.fromfile(args.config) 41 | # import modules from string list. 42 | if cfg.get("custom_imports", None): 43 | from mmcv.utils import import_modules_from_strings 44 | 45 | import_modules_from_strings(**cfg["custom_imports"]) 46 | 47 | model = build_detector( 48 | cfg.model, train_cfg=cfg.get("train_cfg"), test_cfg=cfg.get("test_cfg") 49 | ) 50 | if torch.cuda.is_available(): 51 | model.cuda() 52 | model.eval() 53 | 54 | if hasattr(model, "forward_dummy"): 55 | model.forward = model.forward_dummy 56 | else: 57 | raise NotImplementedError( 58 | "FLOPs counter is currently not currently supported with {}".format( 59 | model.__class__.__name__ 60 | ) 61 | ) 62 | 63 | flops, params = get_model_complexity_info(model, input_shape) 64 | split_line = "=" * 30 65 | print( 66 | f"{split_line}\nInput shape: {input_shape}\n" 67 | f"Flops: {flops}\nParams: {params}\n{split_line}" 68 | ) 69 | print( 70 | "!!!Please be cautious if you use the results in papers. " 71 | "You may need to check if all ops are supported and verify that the " 72 | "flops computation is correct." 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /tools/interpolate_extracted_masks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import json 9 | import multiprocessing as mp 10 | import time 11 | 12 | import cv2 13 | import numpy as np 14 | from pycocotools import mask as maskUtils 15 | 16 | 17 | MASKS_DIR = "" 18 | NUM_SPLITS = 1 19 | 20 | 21 | def resize_mask(image_ann): 22 | new_anns = [] 23 | for ann in image_ann: 24 | segm = ann["segmentation"] 25 | mask = maskUtils.decode(segm) 26 | orig_shape = ann["ori_shape"][:2] 27 | resized_mask = cv2.resize( 28 | mask, (orig_shape[1], orig_shape[0]), interpolation=cv2.INTER_NEAREST 29 | ) 30 | new_rle = maskUtils.encode(np.asfortranarray(resized_mask)) 31 | if type(new_rle["counts"]) == bytes: 32 | new_rle["counts"] = new_rle["counts"].decode("ascii") 33 | area = maskUtils.area(new_rle) 34 | bbox = maskUtils.toBbox(new_rle) 35 | ann["segmentation"] = new_rle 36 | ann["area"] = int(area) 37 | ann["bbox"] = [int(coord) for coord in bbox] 38 | new_anns.append(ann) 39 | return new_anns 40 | 41 | 42 | SPLITS = range(0, NUM_SPLITS) 43 | for split in SPLITS: 44 | json_path = f"{MASKS_DIR}/masks_{split}.json" 45 | output_path = f"{MASKS_DIR}/masks_interpolated_{split}.json" 46 | 47 | ann_json = json.load(open(json_path, "rb")) 48 | 49 | start = time.perf_counter() 50 | 51 | mp_pool = mp.Pool(processes=60) 52 | resized_masks = mp_pool.map(resize_mask, ann_json) 53 | 54 | print(f"finished {split}") 55 | print(time.perf_counter() - start, "seconds") 56 | 57 | json.dump(resized_masks, open(output_path, "w")) 58 | -------------------------------------------------------------------------------- /tools/merge_annotations.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import json 9 | 10 | splits = 4 11 | 12 | # COCO 13 | pseudo_mask_path = "WORK_DIR/masks_{}.json" 14 | ref_json_path = "DATA_DIR/instances.json" 15 | 16 | output_path = pseudo_mask_path.format("all") 17 | 18 | # JOIN into processed 19 | ref_json = json.load(open(ref_json_path, "rb")) 20 | max_id = 0 21 | for ann in ref_json["annotations"]: 22 | max_id = max(ann["id"], max_id) 23 | 24 | pseudo_masks = [] 25 | for shard in range(splits): 26 | ann_json = json.load(open(pseudo_mask_path.format(shard), "rb")) 27 | for img_ann in ann_json: 28 | for ann in img_ann: 29 | max_id += 1 30 | ann["id"] = max_id 31 | pseudo_masks.append(ann) 32 | 33 | ref_json["annotations"] = pseudo_masks 34 | 35 | print(f"generated {len(pseudo_masks)} pseudo masks") 36 | json.dump(ref_json, open(output_path, "w")) 37 | -------------------------------------------------------------------------------- /tools/model_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import cv2 9 | import mmcv 10 | import numpy as np 11 | import torch 12 | from mmcv import Config, DictAction 13 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint, wrap_fp16_model 14 | from mmdet.core import bbox2roi 15 | from mmdet.models import build_detector 16 | 17 | cfg_file = "./configs/oln_mask/two_tower_example.py" 18 | checkpoint = "/checkpoint/weiyaowang/pairwise_potential/coco_to_lvis/maskrcnn_baseline/latest.pth" 19 | img_path = "/checkpoint/trandu/oln/data/coco/train2017/000000391895.jpg" 20 | 21 | model_config = Config.fromfile(cfg_file).model 22 | model_config.test_cfg.rcnn.nms = dict(type="nms", iou_threshold=1.0) 23 | two_tower = build_detector(model_config) 24 | # load_checkpoint(mask_rcnn, checkpoint, map_location='cpu') 25 | two_tower.cpu() 26 | two_tower.eval() 27 | input_img = cv2.imread(img_path) 28 | img_norm_cfg = dict( 29 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 30 | ) 31 | input_img = mmcv.imnormalize( 32 | input_img, 33 | np.array(img_norm_cfg["mean"]), 34 | np.array(img_norm_cfg["std"]), 35 | ) 36 | img_metas = [ 37 | { 38 | "img_shape": input_img.shape, 39 | "scale_factor": 1.0, 40 | "ori_shape": input_img.shape, 41 | "pad_shape": input_img.shape, 42 | } 43 | ] 44 | 45 | input_img = np.transpose(input_img, (2, 0, 1)) 46 | input_img = torch.from_numpy(input_img) 47 | 48 | input_img = input_img.unsqueeze(0) 49 | 50 | out = two_tower.simple_test(input_img, img_metas) 51 | 52 | # features = mask_rcnn.extract_feat(input_img) 53 | 54 | # # (tl_x, tl_y, br_x, br_y) 55 | # proposal_list = mask_rcnn.rpn_head.simple_test_rpn(features, img_metas) 56 | # det_bboxes, det_labels = mask_rcnn.roi_head.simple_test_bboxes( 57 | # features, img_metas, proposal_list, mask_rcnn.roi_head.test_cfg, rescale=False 58 | # ) 59 | # segm_results = mask_rcnn.roi_head.simple_test_mask( 60 | # features, img_metas, det_bboxes, det_labels, rescale=False 61 | # ) 62 | # print(proposal_list[0].shape) 63 | # roi_out = mask_rcnn.roi_head.forward_dummy(features, proposal_list[0]) 64 | # print(roi_out[0].shape) 65 | # print(roi_out[1].shape) 66 | # print(roi_out[2].shape) 67 | # print(roi_out[3].shape) 68 | 69 | # print(f"det boxes: {det_bboxes[0].shape}") 70 | 71 | # print(proposal_list[0][::200, :4]) 72 | # print(det_bboxes[0][::200, :4]) 73 | # print(segm_results[0].shape) 74 | 75 | # print(roi_out[1][::200, :4]) 76 | # print(roi_out[2][0, 0, :, :]) 77 | # print(roi_out[1][:5, :4]) 78 | # print(roi_out[3][:5, :4]) 79 | -------------------------------------------------------------------------------- /tools/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | 10 | from mmcv import Config, DictAction 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Print the whole config") 15 | parser.add_argument("config", help="config file path") 16 | parser.add_argument( 17 | "--options", nargs="+", action=DictAction, help="arguments in dict" 18 | ) 19 | args = parser.parse_args() 20 | 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | cfg = Config.fromfile(args.config) 28 | if args.options is not None: 29 | cfg.merge_from_dict(args.options) 30 | print(f"Config:\n{cfg.pretty_text}") 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | import subprocess 10 | 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description="Process a checkpoint to be published") 16 | parser.add_argument("in_file", help="input checkpoint filename") 17 | parser.add_argument("out_file", help="output checkpoint filename") 18 | args = parser.parse_args() 19 | return args 20 | 21 | 22 | def process_checkpoint(in_file, out_file): 23 | checkpoint = torch.load(in_file, map_location="cpu") 24 | # remove optimizer for smaller file size 25 | if "optimizer" in checkpoint: 26 | del checkpoint["optimizer"] 27 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 28 | # add the code here. 29 | torch.save(checkpoint, out_file) 30 | sha = subprocess.check_output(["sha256sum", out_file]).decode() 31 | if out_file.endswith(".pth"): 32 | out_file_name = out_file[:-4] 33 | else: 34 | out_file_name = out_file 35 | final_file = out_file_name + f"-{sha[:8]}.pth" 36 | subprocess.Popen(["mv", out_file, final_file]) 37 | 38 | 39 | def main(): 40 | args = parse_args() 41 | process_checkpoint(args.in_file, args.out_file) 42 | 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /tools/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree 6 | 7 | 8 | import argparse 9 | from collections import OrderedDict 10 | 11 | import torch 12 | 13 | 14 | def convert_stem(model_key, model_weight, state_dict, converted_names): 15 | new_key = model_key.replace("stem.conv", "conv1") 16 | new_key = new_key.replace("stem.bn", "bn1") 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f"Convert {model_key} to {new_key}") 20 | 21 | 22 | def convert_head(model_key, model_weight, state_dict, converted_names): 23 | new_key = model_key.replace("head.fc", "fc") 24 | state_dict[new_key] = model_weight 25 | converted_names.add(model_key) 26 | print(f"Convert {model_key} to {new_key}") 27 | 28 | 29 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 30 | split_keys = model_key.split(".") 31 | layer, block, module = split_keys[:3] 32 | block_id = int(block[1:]) 33 | layer_name = f"layer{int(layer[1:])}" 34 | block_name = f"{block_id - 1}" 35 | 36 | if block_id == 1 and module == "bn": 37 | new_key = f"{layer_name}.{block_name}.downsample.1.{split_keys[-1]}" 38 | elif block_id == 1 and module == "proj": 39 | new_key = f"{layer_name}.{block_name}.downsample.0.{split_keys[-1]}" 40 | elif module == "f": 41 | if split_keys[3] == "a_bn": 42 | module_name = "bn1" 43 | elif split_keys[3] == "b_bn": 44 | module_name = "bn2" 45 | elif split_keys[3] == "c_bn": 46 | module_name = "bn3" 47 | elif split_keys[3] == "a": 48 | module_name = "conv1" 49 | elif split_keys[3] == "b": 50 | module_name = "conv2" 51 | elif split_keys[3] == "c": 52 | module_name = "conv3" 53 | new_key = f"{layer_name}.{block_name}.{module_name}.{split_keys[-1]}" 54 | else: 55 | raise ValueError(f"Unsupported conversion of key {model_key}") 56 | print(f"Convert {model_key} to {new_key}") 57 | state_dict[new_key] = model_weight 58 | converted_names.add(model_key) 59 | 60 | 61 | def convert(src, dst): 62 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 63 | # load caffe model 64 | regnet_model = torch.load(src) 65 | blobs = regnet_model["model_state"] 66 | # convert to pytorch style 67 | state_dict = OrderedDict() 68 | converted_names = set() 69 | for key, weight in blobs.items(): 70 | if "stem" in key: 71 | convert_stem(key, weight, state_dict, converted_names) 72 | elif "head" in key: 73 | convert_head(key, weight, state_dict, converted_names) 74 | elif key.startswith("s"): 75 | convert_reslayer(key, weight, state_dict, converted_names) 76 | 77 | # check if all layers are converted 78 | for key in blobs: 79 | if key not in converted_names: 80 | print(f"not converted: {key}") 81 | # save checkpoint 82 | checkpoint = dict() 83 | checkpoint["state_dict"] = state_dict 84 | torch.save(checkpoint, dst) 85 | 86 | 87 | def main(): 88 | parser = argparse.ArgumentParser(description="Convert model keys") 89 | parser.add_argument("src", help="src detectron model path") 90 | parser.add_argument("dst", help="save path") 91 | args = parser.parse_args() 92 | convert(args.src, args.dst) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree 8 | 9 | 10 | set -x 11 | 12 | PARTITION=$1 13 | JOB_NAME=$2 14 | CONFIG=$3 15 | CHECKPOINT=$4 16 | GPUS=${GPUS:-8} 17 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 18 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 19 | PY_ARGS=${@:5} 20 | SRUN_ARGS=${SRUN_ARGS:-""} 21 | 22 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 23 | srun -p ${PARTITION} \ 24 | --job-name=${JOB_NAME} \ 25 | --gres=gpu:${GPUS_PER_NODE} \ 26 | --ntasks=${GPUS} \ 27 | --ntasks-per-node=${GPUS_PER_NODE} \ 28 | --cpus-per-task=${CPUS_PER_TASK} \ 29 | --kill-on-bad-exit=1 \ 30 | ${SRUN_ARGS} \ 31 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 32 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # Copyright (c) Meta Platforms, Inc. and affiliates. 5 | # All rights reserved. 6 | 7 | # This source code is licensed under the license found in the 8 | # LICENSE file in the root directory of this source tree 9 | 10 | 11 | set -x 12 | 13 | PARTITION=$1 14 | JOB_NAME=$2 15 | CONFIG=$3 16 | WORK_DIR=$4 17 | GPUS=${GPUS:-8} 18 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 19 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 20 | SRUN_ARGS=${SRUN_ARGS:-""} 21 | PY_ARGS=${@:5} 22 | 23 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 24 | srun -p ${PARTITION} \ 25 | --job-name=${JOB_NAME} \ 26 | --gres=gpu:${GPUS_PER_NODE} \ 27 | --ntasks=${GPUS} \ 28 | --ntasks-per-node=${GPUS_PER_NODE} \ 29 | --cpus-per-task=${CPUS_PER_TASK} \ 30 | --kill-on-bad-exit=1 \ 31 | ${SRUN_ARGS} \ 32 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 33 | --------------------------------------------------------------------------------