├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── configs ├── BigDetection │ ├── cbnetv2 │ │ ├── htc_cbv2_swin_base_adamw_20e_coco.py │ │ ├── htc_cbv2_swin_base_giou_4conv1f_adamw_20e_coco.py │ │ ├── htc_cbv2_swin_base_giou_4conv1f_adamw_20e_coco_tta.py │ │ └── htc_cbv2_swin_base_giou_4conv1f_adamw_bigdet.py │ ├── deformable_detr │ │ ├── deformable_detr_r50_16x2_1x_coco.py │ │ └── deformable_detr_r50_16x2_8x_bigdet.py │ └── yolov3 │ │ ├── yolov3_d53_mstrain-608_1x_coco.py │ │ └── yolov3_d53_mstrain-608_8x_bigdet.py └── _base_ │ ├── datasets │ ├── bigdet_detection.py │ ├── coco_detection.py │ └── coco_instance.py │ ├── default_runtime.py │ ├── models │ ├── htc_without_semantic+mask_swin_fpn.py │ └── htc_without_semantic_swin_fpn.py │ └── schedules │ ├── schedule_1x.py │ ├── schedule_20e.py │ └── schedule_2x.py ├── demo ├── README.md ├── cbnet_output.png ├── demo.jpg └── image_demo.py ├── detectron2-projects └── CenterNet2 │ ├── README.md │ └── configs │ ├── Base-CRCNN.yaml │ ├── Base-CenterNet2.yaml │ ├── Base-RCNN-C4.yaml │ ├── Base-RCNN-FPN.yaml │ ├── cascade_rcnn_R_50_BigDet_8x.yaml │ ├── centernet2_R_50_BigDet_8x.yaml │ ├── faster_rcnn_R_50_C4_BigDet_8x.yaml │ ├── faster_rcnn_R_50_FPN_BigDet_8x.yaml │ ├── faster_rcnn_R_50_FPN_COCO-1.yaml │ ├── faster_rcnn_R_50_FPN_COCO-10.yaml │ ├── faster_rcnn_R_50_FPN_COCO-2.yaml │ └── faster_rcnn_R_50_FPN_COCO-5.yaml ├── mmcv_custom ├── __init__.py ├── checkpoint.py └── runner │ ├── __init__.py │ ├── checkpoint.py │ ├── epoch_based_runner.py │ └── iter_based_runner.py ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── builder.py │ │ ├── point_generator.py │ │ └── utils.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── center_region_assigner.py │ │ │ ├── grid_assigner.py │ │ │ ├── hungarian_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ ├── point_assigner.py │ │ │ ├── region_assigner.py │ │ │ └── uniform_assigner.py │ │ ├── builder.py │ │ ├── coder │ │ │ ├── __init__.py │ │ │ ├── base_bbox_coder.py │ │ │ ├── bucketing_bbox_coder.py │ │ │ ├── delta_xywh_bbox_coder.py │ │ │ ├── legacy_delta_xywh_bbox_coder.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ ├── tblr_bbox_coder.py │ │ │ └── yolo_bbox_coder.py │ │ ├── demodata.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── iou2d_calculator.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── match_cost.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── sampling_result.py │ │ │ └── score_hlr_sampler.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── export │ │ ├── __init__.py │ │ ├── model_wrappers.py │ │ ├── onnx_helper.py │ │ └── pytorch2onnx.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ ├── structures.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ ├── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py │ └── visualization │ │ ├── __init__.py │ │ └── image.py ├── datasets │ ├── __init__.py │ ├── api_wrappers │ │ ├── __init__.py │ │ └── coco_api.py │ ├── builder.py │ ├── cityscapes.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── deepfashion.py │ ├── lvis.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── auto_augment.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_classaware_sampler.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ ├── utils.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── cbnet.py │ │ ├── darknet.py │ │ ├── detectors_resnet.py │ │ ├── detectors_resnext.py │ │ ├── hourglass.py │ │ ├── hrnet.py │ │ ├── mobilenet_v2.py │ │ ├── regnet.py │ │ ├── res2net.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── ssd_vgg.py │ │ ├── swin_transformer.py │ │ └── trident_resnet.py │ ├── builder.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── anchor_free_head.py │ │ ├── anchor_head.py │ │ ├── atss_head.py │ │ ├── autoassign_head.py │ │ ├── base_dense_head.py │ │ ├── cascade_rpn_head.py │ │ ├── centernet_head.py │ │ ├── centripetal_head.py │ │ ├── corner_head.py │ │ ├── deformable_detr_head.py │ │ ├── dense_test_mixins.py │ │ ├── detr_head.py │ │ ├── embedding_rpn_head.py │ │ ├── fcos_head.py │ │ ├── fovea_head.py │ │ ├── free_anchor_retina_head.py │ │ ├── fsaf_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── gfl_head.py │ │ ├── guided_anchor_head.py │ │ ├── ld_head.py │ │ ├── nasfcos_head.py │ │ ├── paa_head.py │ │ ├── pisa_retinanet_head.py │ │ ├── pisa_ssd_head.py │ │ ├── reppoints_head.py │ │ ├── retina_head.py │ │ ├── retina_sepbn_head.py │ │ ├── rpn_head.py │ │ ├── sabl_retina_head.py │ │ ├── ssd_head.py │ │ ├── vfnet_head.py │ │ ├── yolact_head.py │ │ ├── yolo_head.py │ │ └── yolof_head.py │ ├── detectors │ │ ├── __init__.py │ │ ├── atss.py │ │ ├── autoassign.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── centernet.py │ │ ├── cornernet.py │ │ ├── deformable_detr.py │ │ ├── detr.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── fovea.py │ │ ├── fsaf.py │ │ ├── gfl.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── kd_one_stage.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── nasfcos.py │ │ ├── paa.py │ │ ├── point_rend.py │ │ ├── reppoints_detector.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── scnet.py │ │ ├── single_stage.py │ │ ├── sparse_rcnn.py │ │ ├── trident_faster_rcnn.py │ │ ├── two_stage.py │ │ ├── vfnet.py │ │ ├── yolact.py │ │ ├── yolo.py │ │ └── yolof.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── ae_loss.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── gaussian_focal_loss.py │ │ ├── gfocal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── kd_loss.py │ │ ├── mse_loss.py │ │ ├── pisa_loss.py │ │ ├── seesaw_loss.py │ │ ├── smooth_l1_loss.py │ │ ├── utils.py │ │ └── varifocal_loss.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── cbnet_fpn.py │ │ ├── channel_mapper.py │ │ ├── ct_resnet_neck.py │ │ ├── dilated_encoder.py │ │ ├── fpg.py │ │ ├── fpn.py │ │ ├── fpn_carafe.py │ │ ├── hrfpn.py │ │ ├── nas_fpn.py │ │ ├── nasfcos_fpn.py │ │ ├── pafpn.py │ │ ├── rfp.py │ │ ├── ssd_neck.py │ │ └── yolo_neck.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── base_roi_head.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ ├── convfc_bbox_head.py │ │ │ ├── dii_head.py │ │ │ ├── double_bbox_head.py │ │ │ ├── sabl_head.py │ │ │ └── scnet_bbox_head.py │ │ ├── cascade_roi_head.py │ │ ├── double_roi_head.py │ │ ├── dynamic_roi_head.py │ │ ├── grid_roi_head.py │ │ ├── htc_roi_head.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── coarse_mask_head.py │ │ │ ├── fcn_mask_head.py │ │ │ ├── feature_relay_head.py │ │ │ ├── fused_semantic_head.py │ │ │ ├── global_context_head.py │ │ │ ├── grid_head.py │ │ │ ├── htc_mask_head.py │ │ │ ├── mask_point_head.py │ │ │ ├── maskiou_head.py │ │ │ ├── scnet_mask_head.py │ │ │ └── scnet_semantic_head.py │ │ ├── mask_scoring_roi_head.py │ │ ├── pisa_roi_head.py │ │ ├── point_rend_roi_head.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ ├── base_roi_extractor.py │ │ │ ├── generic_roi_extractor.py │ │ │ └── single_level_roi_extractor.py │ │ ├── scnet_roi_head.py │ │ ├── shared_heads │ │ │ ├── __init__.py │ │ │ └── res_layer.py │ │ ├── sparse_roi_head.py │ │ ├── standard_roi_head.py │ │ ├── test_mixins.py │ │ └── trident_roi_head.py │ └── utils │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── gaussian_target.py │ │ ├── inverted_residual.py │ │ ├── make_divisible.py │ │ ├── normed_predictor.py │ │ ├── positional_encoding.py │ │ ├── res_layer.py │ │ ├── se_layer.py │ │ └── transformer.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── logger.py │ ├── optimizer.py │ ├── profiling.py │ ├── util_mixins.py │ └── util_random.py └── version.py ├── model-index.yml ├── pytest.ini ├── requirements.txt ├── requirements ├── build.txt ├── docs.txt ├── mminstall.txt ├── optional.txt ├── readthedocs.txt ├── runtime.txt └── tests.txt ├── resources ├── bigdet_statistics.png └── bigdetection.png ├── setup.cfg ├── setup.py └── tools ├── analysis_tools ├── analyze_logs.py ├── analyze_results.py ├── benchmark.py ├── coco_error_analysis.py ├── eval_metric.py ├── get_flops.py ├── robustness_eval.py └── test_robustness.py ├── dataset_converters ├── cityscapes.py └── pascal_voc.py ├── deployment ├── mmdet2torchserve.py ├── mmdet_handler.py ├── onnx2tensorrt.py ├── pytorch2onnx.py └── test.py ├── dist_test.sh ├── dist_train.sh ├── misc ├── browse_dataset.py └── print_config.py ├── model_converters ├── detectron2pytorch.py ├── publish_model.py ├── regnet2mmdet.py ├── selfsup2mmdet.py ├── upgrade_model_version.py └── upgrade_ssd_version.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | data 108 | .vscode 109 | .idea 110 | .DS_Store 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | work_dirs/ 117 | 118 | # Pytorch 119 | *.pth 120 | *.py~ 121 | *.sh~ 122 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/pycqa/flake8.git 3 | rev: 3.8.3 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/asottile/seed-isort-config 7 | rev: v2.2.0 8 | hooks: 9 | - id: seed-isort-config 10 | - repo: https://github.com/timothycrosley/isort 11 | rev: 4.3.21 12 | hooks: 13 | - id: isort 14 | - repo: https://github.com/pre-commit/mirrors-yapf 15 | rev: v0.30.0 16 | hooks: 17 | - id: yapf 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v3.1.0 20 | hooks: 21 | - id: trailing-whitespace 22 | - id: check-yaml 23 | - id: end-of-file-fixer 24 | - id: requirements-txt-fixer 25 | - id: double-quote-string-fixer 26 | - id: check-merge-conflict 27 | - id: fix-encoding-pragma 28 | args: ["--remove"] 29 | - id: mixed-line-ending 30 | args: ["--fix=lf"] 31 | - repo: https://github.com/jumanjihouse/pre-commit-hooks 32 | rev: 2.1.4 33 | hooks: 34 | - id: markdownlint 35 | args: ["-r", "~MD002,~MD013,~MD024,~MD029,~MD033,~MD034,~MD036", "-t", "allow_different_nesting"] 36 | - repo: https://github.com/myint/docformatter 37 | rev: v1.3.1 38 | hooks: 39 | - id: docformatter 40 | args: ["--in-place", "--wrap-descriptions", "79"] 41 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: 3.7 5 | install: 6 | - requirements: requirements/docs.txt 7 | - requirements: requirements/readthedocs.txt 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements/*.txt 2 | include mmdet/VERSION 3 | include mmdet/model-index.yml 4 | include mmdet/demo/*/* 5 | recursive-include mmdet/configs *.py *.yml 6 | recursive-include mmdet/tools *.sh *.py 7 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | BigDetection 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Our project is mainly based on MMDetection codebase (https://github.com/open-mmlab/mmdetection). In order to train models on large combined dataset, we modify original distributed_sampler.py file in MMDetection (https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/samplers/distributed_sampler.py) to our distributed_classaware_sampler.py file (https://github.com/amazon-research/bigdetection/blob/main/mmdet/datasets/samplers/distributed_classaware_sampler.py). Other parts remain the same as MMDetection. All the files under configs folder are just configurations for training different models. 5 | -------------------------------------------------------------------------------- /configs/BigDetection/cbnetv2/htc_cbv2_swin_base_giou_4conv1f_adamw_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'htc_cbv2_swin_base_adamw_20e_coco.py' 2 | 3 | model = dict( 4 | roi_head=dict( 5 | bbox_head=[ 6 | dict( 7 | type='ConvFCBBoxHead', 8 | num_shared_convs=4, 9 | num_shared_fcs=1, 10 | in_channels=256, 11 | conv_out_channels=256, 12 | fc_out_channels=1024, 13 | roi_feat_size=7, 14 | num_classes=80, 15 | bbox_coder=dict( 16 | type='DeltaXYWHBBoxCoder', 17 | target_means=[0., 0., 0., 0.], 18 | target_stds=[0.1, 0.1, 0.2, 0.2]), 19 | reg_class_agnostic=True, 20 | reg_decoded_bbox=True, 21 | norm_cfg=dict(type='SyncBN', requires_grad=True), 22 | loss_cls=dict( 23 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 24 | loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), 25 | dict( 26 | type='ConvFCBBoxHead', 27 | num_shared_convs=4, 28 | num_shared_fcs=1, 29 | in_channels=256, 30 | conv_out_channels=256, 31 | fc_out_channels=1024, 32 | roi_feat_size=7, 33 | num_classes=80, 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[0., 0., 0., 0.], 37 | target_stds=[0.05, 0.05, 0.1, 0.1]), 38 | reg_class_agnostic=True, 39 | reg_decoded_bbox=True, 40 | norm_cfg=dict(type='SyncBN', requires_grad=True), 41 | loss_cls=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 43 | loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), 44 | dict( 45 | type='ConvFCBBoxHead', 46 | num_shared_convs=4, 47 | num_shared_fcs=1, 48 | in_channels=256, 49 | conv_out_channels=256, 50 | fc_out_channels=1024, 51 | roi_feat_size=7, 52 | num_classes=80, 53 | bbox_coder=dict( 54 | type='DeltaXYWHBBoxCoder', 55 | target_means=[0., 0., 0., 0.], 56 | target_stds=[0.033, 0.033, 0.067, 0.067]), 57 | reg_class_agnostic=True, 58 | reg_decoded_bbox=True, 59 | norm_cfg=dict(type='SyncBN', requires_grad=True), 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='GIoULoss', loss_weight=10.0)) 63 | ] 64 | ) 65 | ) 66 | -------------------------------------------------------------------------------- /configs/BigDetection/cbnetv2/htc_cbv2_swin_base_giou_4conv1f_adamw_20e_coco_tta.py: -------------------------------------------------------------------------------- 1 | _base_ = 'htc_cbv2_swin_base_giou_4conv1f_adamw_20e_coco.py' 2 | 3 | # ------ Test time augmentation ------ 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | test_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict( 10 | type='MultiScaleFlipAug', 11 | img_scale=[(1600, 1000), (1600, 1400), (1800, 1200), (1800, 1600)], 12 | flip=True, 13 | transforms=[ 14 | dict(type='Resize', keep_ratio=True), 15 | dict(type='RandomFlip'), 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='Pad', size_divisor=32), 18 | dict(type='ImageToTensor', keys=['img']), 19 | dict(type='Collect', keys=['img']), 20 | ]) 21 | ] 22 | data = dict( 23 | val=dict(pipeline=test_pipeline), 24 | test=dict(pipeline=test_pipeline)) -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(interval=1, metric='bbox') 50 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(metric=['bbox', 'segm']) 50 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | This is a bigdetection demo. 2 | 3 | ##### Steps: 4 | 5 | ###### Replace 'bigdetection/mmdet/apis/inference.py' in original bigdetection program with new inference.py. 6 | 7 | ###### Checkpoint file download link: 8 | 9 | [yolov3](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542-4bc34944.pth) -- Offered by mmdetection official contributor 10 | 11 | [yolov3](https://big-detection.s3.us-west-2.amazonaws.com/bigdet_cpts/mmdetection_cpts/yolov3_d53_bigdet_8x.pth) -- Offered by our bigdetection contributor 12 | 13 | [detr](https://big-detection.s3.us-west-2.amazonaws.com/bigdet_cpts/mmdetection_cpts/deformable_detr_bigdet_8x.pth) -- Offered by our bigdetection contributor 14 | 15 | [cbvnet](https://big-detection.s3.us-west-2.amazonaws.com/bigdet_cpts/mmdetection_cpts/htc_cbv2_swin_base_giou_4conv1f_bigdet.pth) -- Offered by our bigdetection contributor 16 | 17 | 18 | 19 | ###### Put the .checkpoint file in the checkpoints dictionary 20 | 21 | ——bigdetection——**checkpoints**——.checkpoint 22 | 23 | ​ ——configs 24 | 25 | ​ ——demo 26 | 27 | ​ ——...... 28 | 29 | 30 | 31 | ###### Run the following code in cmd terminal(using mmdetection official checkpoint file) 32 | 33 | --Offered by mmdetection official contributor 34 | 35 | ``` 36 | python demo/image_demo.py demo/demo.jpg configs/BigDetection/yolov3/yolov3_d53_mstrain-608_1x_coco.py checkpoints/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542-4bc34944.pth 37 | ``` 38 | 39 | -- Offered by our bigdetection contributor 40 | 41 | ``` 42 | python demo/image_demo.py demo/demo.jpg configs/BigDetection/yolov3/yolov3_d53_mstrain-608_8x_bigdet.py checkpoints/yolov3_d53_bigdet_8x.pth 43 | ``` 44 | 45 | ``` 46 | python demo/image_demo.py demo/demo.jpg configs/BigDetection/deformable_detr/deformable_detr_r50_16x2_8x_bigdet.py checkpoints/deformable_detr_bigdet_8x.pth 47 | ``` 48 | 49 | ``` 50 | python demo/image_demo.py demo/demo.jpg configs/BigDetection/cbnetv2/htc_cbv2_swin_base_giou_4conv1f_adamw_bigdet.py checkpoints/htc_cbv2_swin_base_giou_4conv1f_bigdet.pth 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /demo/cbnet_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/bigdetection/5c8140523b359bf309913eb0a335a1976497e2a2/demo/cbnet_output.png -------------------------------------------------------------------------------- /demo/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/bigdetection/5c8140523b359bf309913eb0a335a1976497e2a2/demo/demo.jpg -------------------------------------------------------------------------------- /demo/image_demo.py: -------------------------------------------------------------------------------- 1 | # BigDetection image demo 2 | import asyncio 3 | from argparse import ArgumentParser 4 | 5 | from mmdet.apis import (async_inference_detector, inference_detector, init_detector, show_result_pyplot) 6 | 7 | 8 | def parse_args(): 9 | parser = ArgumentParser() 10 | parser.add_argument('img', help='Image file') 11 | parser.add_argument('config', help='Config file') 12 | parser.add_argument('checkpoint', help='Checkpoint file') 13 | parser.add_argument('--out-file', default=None, help='Path to output file') 14 | parser.add_argument('--device', default='cuda:0', help='Device used for inference') 15 | parser.add_argument('--score-thr', type=float, default=0.3, help='bbox score threshold') 16 | parser.add_argument('--async-test', action='store_true', help='whether to set async options for async inference.') 17 | args = parser.parse_args() 18 | 19 | return args 20 | 21 | 22 | def main(args): 23 | # build the model from a config and a checkpoint file 24 | model = init_detector(args.config, args.checkpoint, device=args.device) 25 | # test a single image 26 | result = inference_detector(model, args.img) 27 | # show the results 28 | show_result_pyplot(model, args.img, result, score_thr=args.score_thr, out_file=args.out_file) 29 | 30 | 31 | async def async_main(args): 32 | # build the model from a config file and a checkpoint file 33 | model = init_detector(args.config, args.checkpoint, device=args.device) 34 | # test a single image 35 | tasks = asyncio.create_task(async_inference_detector(model, args.img)) 36 | result = await asyncio.gather(tasks) 37 | # show the results 38 | show_result_pyplot( 39 | model, 40 | args.img, 41 | result[0], 42 | score_thr=args.score_thr, 43 | out_file=args.out_file) 44 | 45 | 46 | if __name__ == '__main__': 47 | args = parse_args() 48 | if args.async_test: 49 | asyncio.run(async_main(args)) 50 | else: 51 | main(args) 52 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/README.md: -------------------------------------------------------------------------------- 1 | This directory provides partial configuration implemented by detectron2. Please follow the installation of [CenterNet2](https://github.com/xingyizhou/CenterNet2) to reproduce all results. 2 | 3 | ## Training 4 | 5 | ***Pre-training*** 6 | 7 | To pre-train a CenterNet2 with ResNet-50 backbone on BigDetection using 8 GPUs, run: 8 | ``` 9 | python projects/CenterNet2/train_net.py \ 10 | --config-file projects/CenterNet2/configs/centernet2_R_50_BigDet_8x.yaml 11 | --num-gpus 8 12 | ``` 13 | 14 | ***Data efficiency*** 15 | 16 | To fine-tune a BigDetection pre-trained Faster R-CNN on partial COCO, run: 17 | ``` 18 | # 1% COCO 19 | python projects/CenterNet2/train_net.py \ 20 | --config-file projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-1.yaml 21 | --num-gpus 8 22 | MODEL.WEIGHTS /path/to/bigdet_pretrained_rcnn_checkpoint 23 | 24 | # 2% COCO 25 | python projects/CenterNet2/train_net.py \ 26 | --config-file projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-2.yaml 27 | --num-gpus 8 28 | MODEL.WEIGHTS /path/to/bigdet_pretrained_rcnn_checkpoint 29 | 30 | # 5% COCO 31 | python projects/CenterNet2/train_net.py \ 32 | --config-file projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-5.yaml 33 | --num-gpus 8 34 | MODEL.WEIGHTS /path/to/bigdet_pretrained_rcnn_checkpoint 35 | 36 | # 10% COCO 37 | python projects/CenterNet2/train_net.py \ 38 | --config-file projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-10.yaml 39 | --num-gpus 8 40 | MODEL.WEIGHTS /path/to/bigdet_pretrained_rcnn_checkpoint 41 | ``` 42 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/Base-CRCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_p67_resnet_fpn_backbone" 5 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 6 | RESNETS: 7 | DEPTH: 50 8 | OUT_FEATURES: ["res3", "res4", "res5"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | ANCHOR_GENERATOR: 12 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 13 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] 14 | RPN: 15 | IN_FEATURES: ["p3", "p4", "p5", "p6", "p7"] 16 | PRE_NMS_TOPK_TRAIN: 2000 17 | PRE_NMS_TOPK_TEST: 1000 18 | POST_NMS_TOPK_TRAIN: 2000 19 | POST_NMS_TOPK_TEST: 1000 20 | ROI_HEADS: 21 | NUM_CLASSES: 80 22 | NAME: CustomCascadeROIHeads 23 | IN_FEATURES: ["p3", "p4", "p5"] 24 | SCORE_THRESH_TEST: 0.0001 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | CLS_AGNOSTIC_BBOX_REG: True 30 | DATASETS: 31 | TRAIN: ("coco_2017_train",) 32 | TEST: ("coco_2017_val",) 33 | TEST: 34 | DETECTIONS_PER_IMAGE: 300 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.01 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 1000000 41 | WARMUP_ITERS: 4000 42 | WARMUP_FACTOR: 0.00025 43 | CLIP_GRADIENTS: 44 | ENABLED: True 45 | INPUT: 46 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 47 | VERSION: 2 48 | OUTPUT_DIR: "output/CRCNN/auto" 49 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/Base-CenterNet2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | PROPOSAL_GENERATOR: 4 | NAME: "CenterNet" 5 | BACKBONE: 6 | NAME: "build_p67_resnet_fpn_backbone" 7 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | ROI_HEADS: 14 | NAME: CustomCascadeROIHeads 15 | IN_FEATURES: ["p3", "p4", "p5", "p6", "p7"] 16 | IOU_THRESHOLDS: [0.6] 17 | NMS_THRESH_TEST: 0.7 18 | ROI_BOX_CASCADE_HEAD: 19 | IOUS: [0.6, 0.7, 0.8] 20 | ROI_BOX_HEAD: 21 | NAME: "FastRCNNConvFCHead" 22 | NUM_FC: 2 23 | POOLER_RESOLUTION: 7 24 | CLS_AGNOSTIC_BBOX_REG: True 25 | MULT_PROPOSAL_SCORE: True 26 | CENTERNET: 27 | REG_WEIGHT: 1. 28 | NOT_NORM_REG: True 29 | ONLY_PROPOSAL: True 30 | WITH_AGN_HM: True 31 | INFERENCE_TH: 0.0001 32 | PRE_NMS_TOPK_TRAIN: 4000 33 | POST_NMS_TOPK_TRAIN: 2000 34 | PRE_NMS_TOPK_TEST: 1000 35 | POST_NMS_TOPK_TEST: 256 36 | NMS_TH_TRAIN: 0.9 37 | NMS_TH_TEST: 0.9 38 | POS_WEIGHT: 0.5 39 | NEG_WEIGHT: 0.5 40 | IGNORE_HIGH_FP: 0.85 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | SOLVER: 45 | IMS_PER_BATCH: 16 46 | BASE_LR: 0.02 47 | STEPS: (60000, 80000) 48 | MAX_ITER: 90000 49 | CHECKPOINT_PERIOD: 1000000000 50 | WARMUP_ITERS: 4000 51 | WARMUP_FACTOR: 0.00025 52 | CLIP_GRADIENTS: 53 | ENABLED: True 54 | INPUT: 55 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 56 | OUTPUT_DIR: "./output/CenterNet2/auto" 57 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | RESNETS: 5 | DEPTH: 50 6 | RPN: 7 | PRE_NMS_TOPK_TEST: 6000 8 | POST_NMS_TOPK_TEST: 1000 9 | ROI_HEADS: 10 | NAME: "Res5ROIHeads" 11 | DATASETS: 12 | TRAIN: ("coco_2017_train",) 13 | TEST: ("coco_2017_val",) 14 | SOLVER: 15 | IMS_PER_BATCH: 16 16 | BASE_LR: 0.02 17 | STEPS: (60000, 80000) 18 | MAX_ITER: 90000 19 | INPUT: 20 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 21 | VERSION: 2 22 | OUTPUT_DIR: "output/RCNN-C4/auto" 23 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 9 | FPN: 10 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 11 | ANCHOR_GENERATOR: 12 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 13 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 14 | RPN: 15 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 16 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 17 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 18 | POST_NMS_TOPK_TRAIN: 1000 19 | POST_NMS_TOPK_TEST: 1000 20 | ROI_HEADS: 21 | NAME: "StandardROIHeads" 22 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 23 | ROI_BOX_HEAD: 24 | NAME: "FastRCNNConvFCHead" 25 | NUM_FC: 2 26 | POOLER_RESOLUTION: 7 27 | ROI_MASK_HEAD: 28 | NAME: "MaskRCNNConvUpsampleHead" 29 | NUM_CONV: 4 30 | POOLER_RESOLUTION: 14 31 | DATASETS: 32 | TRAIN: ("coco_2017_train",) 33 | TEST: ("coco_2017_val",) 34 | SOLVER: 35 | IMS_PER_BATCH: 16 36 | BASE_LR: 0.02 37 | STEPS: (60000, 80000) 38 | MAX_ITER: 90000 39 | INPUT: 40 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 41 | VERSION: 2 42 | OUTPUT_DIR: "output/RCNN-FPN/auto" 43 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/cascade_rcnn_R_50_BigDet_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CRCNN.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 600 5 | DATASETS: 6 | TRAIN: ("bigdet_obj_train", "bigdet_oid_train", "bigdet_lvis_train") 7 | TEST: ("bigdet_val",) 8 | DATALOADER: 9 | SAMPLER_TRAIN: "ClassAwareSampler" 10 | SOLVER: 11 | STEPS: (660000, 700000) 12 | MAX_ITER: 720000 13 | 14 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/centernet2_R_50_BigDet_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 600 5 | CENTERNET: 6 | NUM_CLASSES: 600 7 | DATASETS: 8 | TRAIN: ("bigdet_obj_train", "bigdet_oid_train", "bigdet_lvis_train") 9 | TEST: ("bigdet_val",) 10 | DATALOADER: 11 | SAMPLER_TRAIN: "ClassAwareSampler" 12 | SOLVER: 13 | STEPS: (660000, 700000) 14 | MAX_ITER: 720000 15 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_C4_BigDet_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-C4.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 600 5 | DATASETS: 6 | TRAIN: ("bigdet_obj_train", "bigdet_oid_train", "bigdet_lvis_train") 7 | TEST: ("bigdet_val",) 8 | DATALOADER: 9 | SAMPLER_TRAIN: "ClassAwareSampler" 10 | SOLVER: 11 | STEPS: (660000, 700000) 12 | MAX_ITER: 720000 13 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_FPN_BigDet_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 600 5 | DATASETS: 6 | TRAIN: ("bigdet_obj_train", "bigdet_oid_train", "bigdet_lvis_train") 7 | TEST: ("bigdet_val",) 8 | DATALOADER: 9 | SAMPLER_TRAIN: "ClassAwareSampler" 10 | SOLVER: 11 | STEPS: (660000, 700000) 12 | MAX_ITER: 720000 13 | 14 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-1.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "faster_rcnn_r50_fpn_bigdet_8x.pth" 4 | DATASETS: 5 | TRAIN: ("coco_2017_train_@1",) 6 | SOLVER: 7 | RESET_ITER: True 8 | IMS_PER_BATCH: 40 9 | BASE_LR: 0.001 10 | CHECKPOINT_PERIOD: 50 11 | WARMUP_ITERS: 0 12 | MAX_ITER: 5000 13 | OUTPUT_DIR: "./output/faster_rcnn_RPN_coco-1/" 14 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-10.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "faster_rcnn_R_50_FPN_COCO-1.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_train_@10",) 4 | OUTPUT_DIR: "./output/faster_rcnn_RPN_coco-10/" 5 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "faster_rcnn_R_50_FPN_COCO-1.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_train_@2",) 4 | OUTPUT_DIR: "./output/faster_rcnn_RPN_coco-2/" 5 | -------------------------------------------------------------------------------- /detectron2-projects/CenterNet2/configs/faster_rcnn_R_50_FPN_COCO-5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "faster_rcnn_R_50_FPN_COCO-1.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_train_@5",) 4 | OUTPUT_DIR: "./output/faster_rcnn_RPN_coco-5/" 5 | -------------------------------------------------------------------------------- /mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .checkpoint import load_checkpoint 4 | 5 | __all__ = ['load_checkpoint'] 6 | -------------------------------------------------------------------------------- /mmcv_custom/runner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | from .checkpoint import save_checkpoint 3 | from .epoch_based_runner import EpochBasedRunnerAmp 4 | from .iter_based_runner import IterBasedRunnerAmp 5 | 6 | 7 | __all__ = [ 8 | 'EpochBasedRunnerAmp', 'IterBasedRunnerAmp', 'save_checkpoint' 9 | ] 10 | -------------------------------------------------------------------------------- /mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() 86 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from .version import __version__, short_version 4 | 5 | 6 | def digit_version(version_str): 7 | digit_version = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | digit_version.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | digit_version.append(int(patch_version[0]) - 1) 14 | digit_version.append(int(patch_version[1])) 15 | return digit_version 16 | 17 | 18 | mmcv_minimum_version = '1.3.8' 19 | mmcv_maximum_version = '1.4.0' 20 | mmcv_version = digit_version(mmcv.__version__) 21 | 22 | 23 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 24 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 25 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 26 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 27 | 28 | __all__ = ['__version__', 'short_version'] 29 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 8 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 9 | 'multi_gpu_test', 'single_gpu_test' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .mask import * # noqa: F401, F403 5 | from .post_processing import * # noqa: F401, F403 6 | from .utils import * # noqa: F401, F403 7 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator, 2 | YOLOAnchorGenerator) 3 | from .builder import (ANCHOR_GENERATORS, PRIOR_GENERATORS, 4 | build_anchor_generator, build_prior_generator) 5 | from .point_generator import MlvlPointGenerator, PointGenerator 6 | from .utils import anchor_inside_flags, calc_region, images_to_levels 7 | 8 | __all__ = [ 9 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 10 | 'PointGenerator', 'images_to_levels', 'calc_region', 11 | 'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator', 12 | 'build_prior_generator', 'PRIOR_GENERATORS', 'MlvlPointGenerator' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmcv.utils import Registry, build_from_cfg 4 | 5 | PRIOR_GENERATORS = Registry('Generator for anchors and points') 6 | 7 | ANCHOR_GENERATORS = PRIOR_GENERATORS 8 | 9 | 10 | def build_prior_generator(cfg, default_args=None): 11 | return build_from_cfg(cfg, PRIOR_GENERATORS, default_args) 12 | 13 | 14 | def build_anchor_generator(cfg, default_args=None): 15 | warnings.warn( 16 | '``build_anchor_generator`` would be deprecated soon, please use ' 17 | '``build_prior_generator`` ') 18 | return build_prior_generator(cfg, default_args=default_args) 19 | -------------------------------------------------------------------------------- /mmdet/core/anchor/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def images_to_levels(target, num_levels): 5 | """Convert targets by image to targets by feature level. 6 | 7 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 8 | """ 9 | target = torch.stack(target, 0) 10 | level_targets = [] 11 | start = 0 12 | for n in num_levels: 13 | end = start + n 14 | # level_targets.append(target[:, start:end].squeeze(0)) 15 | level_targets.append(target[:, start:end]) 16 | start = end 17 | return level_targets 18 | 19 | 20 | def anchor_inside_flags(flat_anchors, 21 | valid_flags, 22 | img_shape, 23 | allowed_border=0): 24 | """Check whether the anchors are inside the border. 25 | 26 | Args: 27 | flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4). 28 | valid_flags (torch.Tensor): An existing valid flags of anchors. 29 | img_shape (tuple(int)): Shape of current image. 30 | allowed_border (int, optional): The border to allow the valid anchor. 31 | Defaults to 0. 32 | 33 | Returns: 34 | torch.Tensor: Flags indicating whether the anchors are inside a \ 35 | valid range. 36 | """ 37 | img_h, img_w = img_shape[:2] 38 | if allowed_border >= 0: 39 | inside_flags = valid_flags & \ 40 | (flat_anchors[:, 0] >= -allowed_border) & \ 41 | (flat_anchors[:, 1] >= -allowed_border) & \ 42 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 43 | (flat_anchors[:, 3] < img_h + allowed_border) 44 | else: 45 | inside_flags = valid_flags 46 | return inside_flags 47 | 48 | 49 | def calc_region(bbox, ratio, featmap_size=None): 50 | """Calculate a proportional bbox region. 51 | 52 | The bbox center are fixed and the new h' and w' is h * ratio and w * ratio. 53 | 54 | Args: 55 | bbox (Tensor): Bboxes to calculate regions, shape (n, 4). 56 | ratio (float): Ratio of the output region. 57 | featmap_size (tuple): Feature map size used for clipping the boundary. 58 | 59 | Returns: 60 | tuple: x1, y1, x2, y2 61 | """ 62 | x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long() 63 | y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long() 64 | x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() 65 | y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() 66 | if featmap_size is not None: 67 | x1 = x1.clamp(min=0, max=featmap_size[1]) 68 | y1 = y1.clamp(min=0, max=featmap_size[0]) 69 | x2 = x2.clamp(min=0, max=featmap_size[1]) 70 | y2 = y2.clamp(min=0, max=featmap_size[0]) 71 | return (x1, y1, x2, y2) 72 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 2 | MaxIoUAssigner, RegionAssigner) 3 | from .builder import build_assigner, build_bbox_coder, build_sampler 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, 5 | TBLRBBoxCoder) 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 7 | from .samplers import (BaseSampler, CombinedSampler, 8 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 9 | OHEMSampler, PseudoSampler, RandomSampler, 10 | SamplingResult, ScoreHLRSampler) 11 | from .transforms import (bbox2distance, bbox2result, bbox2roi, 12 | bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, 13 | bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, 14 | distance2bbox, roi2bbox) 15 | 16 | __all__ = [ 17 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 18 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 19 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 20 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner', 21 | 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 22 | 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', 23 | 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 24 | 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner', 25 | 'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh', 26 | 'RegionAssigner' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .center_region_assigner import CenterRegionAssigner 6 | from .grid_assigner import GridAssigner 7 | from .hungarian_assigner import HungarianAssigner 8 | from .max_iou_assigner import MaxIoUAssigner 9 | from .point_assigner import PointAssigner 10 | from .region_assigner import RegionAssigner 11 | from .uniform_assigner import UniformAssigner 12 | 13 | __all__ = [ 14 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 15 | 'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner', 16 | 'HungarianAssigner', 'RegionAssigner', 'UniformAssigner' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | """Base assigner that assigns boxes to ground truth boxes.""" 6 | 7 | @abstractmethod 8 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 9 | """Assign boxes to either a ground truth boxes or a negative boxes.""" 10 | -------------------------------------------------------------------------------- /mmdet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | BBOX_ASSIGNERS = Registry('bbox_assigner') 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_assigner(cfg, **default_args): 9 | """Builder of box assigner.""" 10 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 11 | 12 | 13 | def build_sampler(cfg, **default_args): 14 | """Builder of box sampler.""" 15 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 16 | 17 | 18 | def build_bbox_coder(cfg, **default_args): 19 | """Builder of box coder.""" 20 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_bbox_coder import BaseBBoxCoder 2 | from .bucketing_bbox_coder import BucketingBBoxCoder 3 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 4 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 5 | from .pseudo_bbox_coder import PseudoBBoxCoder 6 | from .tblr_bbox_coder import TBLRBBoxCoder 7 | from .yolo_bbox_coder import YOLOBBoxCoder 8 | 9 | __all__ = [ 10 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 11 | 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder', 12 | 'BucketingBBoxCoder' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseBBoxCoder(metaclass=ABCMeta): 5 | """Base bounding box coder.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def encode(self, bboxes, gt_bboxes): 12 | """Encode deltas between bboxes and ground truth boxes.""" 13 | 14 | @abstractmethod 15 | def decode(self, bboxes, bboxes_pred): 16 | """Decode the predicted bboxes according to prediction and base 17 | boxes.""" 18 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_CODERS 2 | from .base_bbox_coder import BaseBBoxCoder 3 | 4 | 5 | @BBOX_CODERS.register_module() 6 | class PseudoBBoxCoder(BaseBBoxCoder): 7 | """Pseudo bounding box coder.""" 8 | 9 | def __init__(self, **kwargs): 10 | super(BaseBBoxCoder, self).__init__(**kwargs) 11 | 12 | def encode(self, bboxes, gt_bboxes): 13 | """torch.Tensor: return the given ``bboxes``""" 14 | return gt_bboxes 15 | 16 | def decode(self, bboxes, pred_bboxes): 17 | """torch.Tensor: return the given ``pred_bboxes``""" 18 | return pred_bboxes 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from mmdet.utils.util_random import ensure_rng 5 | 6 | 7 | def random_boxes(num=1, scale=1, rng=None): 8 | """Simple version of ``kwimage.Boxes.random`` 9 | 10 | Returns: 11 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 12 | 13 | References: 14 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 15 | 16 | Example: 17 | >>> num = 3 18 | >>> scale = 512 19 | >>> rng = 0 20 | >>> boxes = random_boxes(num, scale, rng) 21 | >>> print(boxes) 22 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 23 | [216.9113, 330.6978, 224.0446, 456.5878], 24 | [405.3632, 196.3221, 493.3953, 270.7942]]) 25 | """ 26 | rng = ensure_rng(rng) 27 | 28 | tlbr = rng.rand(num, 4).astype(np.float32) 29 | 30 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 31 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 32 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 33 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 34 | 35 | tlbr[:, 0] = tl_x * scale 36 | tlbr[:, 1] = tl_y * scale 37 | tlbr[:, 2] = br_x * scale 38 | tlbr[:, 3] = br_y * scale 39 | 40 | boxes = torch.from_numpy(tlbr) 41 | return boxes 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_iou_calculator 2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 3 | 4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 5 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | IOU_CALCULATORS = Registry('IoU calculator') 4 | 5 | 6 | def build_iou_calculator(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_match_cost 2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost 3 | 4 | __all__ = [ 5 | 'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost', 6 | 'FocalLossCost' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | MATCH_COST = Registry('Match Cost') 4 | 5 | 6 | def build_match_cost(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, MATCH_COST, default_args) 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | from .score_hlr_sampler import ScoreHLRSampler 10 | 11 | __all__ = [ 12 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 13 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 14 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_SAMPLERS, build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | @BBOX_SAMPLERS.register_module() 6 | class CombinedSampler(BaseSampler): 7 | """A sampler that combines positive sampler and negative sampler.""" 8 | 9 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 10 | super(CombinedSampler, self).__init__(**kwargs) 11 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 12 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 13 | 14 | def _sample_pos(self, **kwargs): 15 | """Sample positive samples.""" 16 | raise NotImplementedError 17 | 18 | def _sample_neg(self, **kwargs): 19 | """Sample negative samples.""" 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from ..builder import BBOX_SAMPLERS 5 | from .random_sampler import RandomSampler 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class InstanceBalancedPosSampler(RandomSampler): 10 | """Instance balanced sampler that samples equal number of positive samples 11 | for each instance.""" 12 | 13 | def _sample_pos(self, assign_result, num_expected, **kwargs): 14 | """Sample positive boxes. 15 | 16 | Args: 17 | assign_result (:obj:`AssignResult`): The assigned results of boxes. 18 | num_expected (int): The number of expected positive samples 19 | 20 | Returns: 21 | Tensor or ndarray: sampled indices. 22 | """ 23 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 24 | if pos_inds.numel() != 0: 25 | pos_inds = pos_inds.squeeze(1) 26 | if pos_inds.numel() <= num_expected: 27 | return pos_inds 28 | else: 29 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 30 | num_gts = len(unique_gt_inds) 31 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 32 | sampled_inds = [] 33 | for i in unique_gt_inds: 34 | inds = torch.nonzero( 35 | assign_result.gt_inds == i.item(), as_tuple=False) 36 | if inds.numel() != 0: 37 | inds = inds.squeeze(1) 38 | else: 39 | continue 40 | if len(inds) > num_per_gt: 41 | inds = self.random_choice(inds, num_per_gt) 42 | sampled_inds.append(inds) 43 | sampled_inds = torch.cat(sampled_inds) 44 | if len(sampled_inds) < num_expected: 45 | num_extra = num_expected - len(sampled_inds) 46 | extra_inds = np.array( 47 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 48 | if len(extra_inds) > num_extra: 49 | extra_inds = self.random_choice(extra_inds, num_extra) 50 | extra_inds = torch.from_numpy(extra_inds).to( 51 | assign_result.gt_inds.device).long() 52 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 53 | elif len(sampled_inds) > num_expected: 54 | sampled_inds = self.random_choice(sampled_inds, num_expected) 55 | return sampled_inds 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class PseudoSampler(BaseSampler): 10 | """A pseudo sampler that does not do sampling actually.""" 11 | 12 | def __init__(self, **kwargs): 13 | pass 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples.""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples.""" 21 | raise NotImplementedError 22 | 23 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 24 | """Directly returns the positive and negative indices of samples. 25 | 26 | Args: 27 | assign_result (:obj:`AssignResult`): Assigned results 28 | bboxes (torch.Tensor): Bounding boxes 29 | gt_bboxes (torch.Tensor): Ground truth boxes 30 | 31 | Returns: 32 | :obj:`SamplingResult`: sampler results 33 | """ 34 | pos_inds = torch.nonzero( 35 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 36 | neg_inds = torch.nonzero( 37 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 38 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 39 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 40 | assign_result, gt_flags) 41 | return sampling_result 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | @BBOX_SAMPLERS.register_module() 8 | class RandomSampler(BaseSampler): 9 | """Random sampler. 10 | 11 | Args: 12 | num (int): Number of samples 13 | pos_fraction (float): Fraction of positive samples 14 | neg_pos_up (int, optional): Upper bound number of negative and 15 | positive samples. Defaults to -1. 16 | add_gt_as_proposals (bool, optional): Whether to add ground truth 17 | boxes as proposals. Defaults to True. 18 | """ 19 | 20 | def __init__(self, 21 | num, 22 | pos_fraction, 23 | neg_pos_ub=-1, 24 | add_gt_as_proposals=True, 25 | **kwargs): 26 | from mmdet.core.bbox import demodata 27 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 28 | add_gt_as_proposals) 29 | self.rng = demodata.ensure_rng(kwargs.get('rng', None)) 30 | 31 | def random_choice(self, gallery, num): 32 | """Random select some elements from the gallery. 33 | 34 | If `gallery` is a Tensor, the returned indices will be a Tensor; 35 | If `gallery` is a ndarray or list, the returned indices will be a 36 | ndarray. 37 | 38 | Args: 39 | gallery (Tensor | ndarray | list): indices pool. 40 | num (int): expected sample num. 41 | 42 | Returns: 43 | Tensor or ndarray: sampled indices. 44 | """ 45 | assert len(gallery) >= num 46 | 47 | is_tensor = isinstance(gallery, torch.Tensor) 48 | if not is_tensor: 49 | if torch.cuda.is_available(): 50 | device = torch.cuda.current_device() 51 | else: 52 | device = 'cpu' 53 | gallery = torch.tensor(gallery, dtype=torch.long, device=device) 54 | # This is a temporary fix. We can revert the following code 55 | # when PyTorch fixes the abnormal return of torch.randperm. 56 | # See: https://github.com/open-mmlab/mmdetection/pull/5014 57 | perm = torch.randperm(gallery.numel())[:num].to(device=gallery.device) 58 | rand_inds = gallery[perm] 59 | if not is_tensor: 60 | rand_inds = rand_inds.cpu().numpy() 61 | return rand_inds 62 | 63 | def _sample_pos(self, assign_result, num_expected, **kwargs): 64 | """Randomly sample some positive samples.""" 65 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 66 | if pos_inds.numel() != 0: 67 | pos_inds = pos_inds.squeeze(1) 68 | if pos_inds.numel() <= num_expected: 69 | return pos_inds 70 | else: 71 | return self.random_choice(pos_inds, num_expected) 72 | 73 | def _sample_neg(self, assign_result, num_expected, **kwargs): 74 | """Randomly sample some negative samples.""" 75 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 76 | if neg_inds.numel() != 0: 77 | neg_inds = neg_inds.squeeze(1) 78 | if len(neg_inds) <= num_expected: 79 | return neg_inds 80 | else: 81 | return self.random_choice(neg_inds, num_expected) 82 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 2 | get_classes, imagenet_det_classes, 3 | imagenet_vid_classes, voc_classes) 4 | from .eval_hooks import DistEvalHook, EvalHook 5 | from .mean_ap import average_precision, eval_map, print_map_summary 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 7 | print_recall_summary) 8 | 9 | __all__ = [ 10 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 11 | 'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes', 12 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 13 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 14 | 'plot_num_recall', 'plot_iou_recall' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start, 0) * np.maximum( 39 | y_end - y_start, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | union = np.maximum(union, eps) 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 5 | from mmcv.runner import EvalHook as BaseEvalHook 6 | from torch.nn.modules.batchnorm import _BatchNorm 7 | 8 | 9 | class EvalHook(BaseEvalHook): 10 | 11 | def _do_evaluate(self, runner): 12 | """perform evaluation and save ckpt.""" 13 | if not self._should_evaluate(runner): 14 | return 15 | 16 | from mmdet.apis import single_gpu_test 17 | results = single_gpu_test(runner.model, self.dataloader, show=False) 18 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 19 | key_score = self.evaluate(runner, results) 20 | if self.save_best: 21 | self._save_ckpt(runner, key_score) 22 | 23 | 24 | class DistEvalHook(BaseDistEvalHook): 25 | 26 | def _do_evaluate(self, runner): 27 | """perform evaluation and save ckpt.""" 28 | # Synchronization of BatchNorm's buffer (running_mean 29 | # and running_var) is not supported in the DDP of pytorch, 30 | # which may cause the inconsistent performance of models in 31 | # different ranks, so we broadcast BatchNorm's buffers 32 | # of rank 0 to other ranks to avoid this. 33 | if self.broadcast_bn_buffer: 34 | model = runner.model 35 | for name, module in model.named_modules(): 36 | if isinstance(module, 37 | _BatchNorm) and module.track_running_stats: 38 | dist.broadcast(module.running_var, 0) 39 | dist.broadcast(module.running_mean, 0) 40 | 41 | if not self._should_evaluate(runner): 42 | return 43 | 44 | tmpdir = self.tmpdir 45 | if tmpdir is None: 46 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 47 | 48 | from mmdet.apis import multi_gpu_test 49 | results = multi_gpu_test( 50 | runner.model, 51 | self.dataloader, 52 | tmpdir=tmpdir, 53 | gpu_collect=self.gpu_collect) 54 | if runner.rank == 0: 55 | print('\n') 56 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 57 | key_score = self.evaluate(runner, results) 58 | 59 | if self.save_best: 60 | self._save_ckpt(runner, key_score) 61 | -------------------------------------------------------------------------------- /mmdet/core/export/__init__.py: -------------------------------------------------------------------------------- 1 | from .onnx_helper import (add_dummy_nms_for_onnx, dynamic_clip_for_onnx, 2 | get_k_for_topk) 3 | from .pytorch2onnx import (build_model_from_cfg, 4 | generate_inputs_and_wrap_model, 5 | preprocess_example_input) 6 | 7 | __all__ = [ 8 | 'build_model_from_cfg', 'generate_inputs_and_wrap_model', 9 | 'preprocess_example_input', 'get_k_for_topk', 'add_dummy_nms_for_onnx', 10 | 'dynamic_clip_for_onnx' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 3 | from .utils import encode_mask_results, split_combined_polys 4 | 5 | __all__ = [ 6 | 'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks', 7 | 'PolygonMasks', 'encode_mask_results' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | 5 | 6 | def split_combined_polys(polys, poly_lens, polys_per_mask): 7 | """Split the combined 1-D polys into masks. 8 | 9 | A mask is represented as a list of polys, and a poly is represented as 10 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 11 | tensor. Here we need to split the tensor into original representations. 12 | 13 | Args: 14 | polys (list): a list (length = image num) of 1-D tensors 15 | poly_lens (list): a list (length = image num) of poly length 16 | polys_per_mask (list): a list (length = image num) of poly number 17 | of each mask 18 | 19 | Returns: 20 | list: a list (length = image num) of list (length = mask num) of \ 21 | list (length = poly num) of numpy array. 22 | """ 23 | mask_polys_list = [] 24 | for img_id in range(len(polys)): 25 | polys_single = polys[img_id] 26 | polys_lens_single = poly_lens[img_id].tolist() 27 | polys_per_mask_single = polys_per_mask[img_id].tolist() 28 | 29 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 30 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 31 | mask_polys_list.append(mask_polys) 32 | return mask_polys_list 33 | 34 | 35 | # TODO: move this function to more proper place 36 | def encode_mask_results(mask_results): 37 | """Encode bitmap mask to RLE code. 38 | 39 | Args: 40 | mask_results (list | tuple[list]): bitmap mask results. 41 | In mask scoring rcnn, mask_results is a tuple of (segm_results, 42 | segm_cls_score). 43 | 44 | Returns: 45 | list | tuple: RLE encoded mask. 46 | """ 47 | if isinstance(mask_results, tuple): # mask scoring 48 | cls_segms, cls_mask_scores = mask_results 49 | else: 50 | cls_segms = mask_results 51 | num_classes = len(cls_segms) 52 | encoded_mask_results = [[] for _ in range(num_classes)] 53 | for i in range(len(cls_segms)): 54 | for cls_segm in cls_segms[i]: 55 | encoded_mask_results[i].append( 56 | mask_util.encode( 57 | np.array( 58 | cls_segm[:, :, np.newaxis], order='F', 59 | dtype='uint8'))[0]) # encoded with RLE 60 | if isinstance(mask_results, tuple): 61 | return encoded_mask_results, cls_mask_scores 62 | else: 63 | return encoded_mask_results 64 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import fast_nms, multiclass_nms 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks', 'fast_nms' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads, reduce_mean 2 | from .misc import flip_tensor, mask2ndarray, multi_apply, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply', 6 | 'unmap', 'mask2ndarray', 'flip_tensor' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import OrderedDict 3 | 4 | import torch.distributed as dist 5 | from mmcv.runner import OptimizerHook 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 7 | _unflatten_dense_tensors) 8 | 9 | 10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 11 | if bucket_size_mb > 0: 12 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 13 | buckets = _take_tensors(tensors, bucket_size_bytes) 14 | else: 15 | buckets = OrderedDict() 16 | for tensor in tensors: 17 | tp = tensor.type() 18 | if tp not in buckets: 19 | buckets[tp] = [] 20 | buckets[tp].append(tensor) 21 | buckets = buckets.values() 22 | 23 | for bucket in buckets: 24 | flat_tensors = _flatten_dense_tensors(bucket) 25 | dist.all_reduce(flat_tensors) 26 | flat_tensors.div_(world_size) 27 | for tensor, synced in zip( 28 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 29 | tensor.copy_(synced) 30 | 31 | 32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 33 | """Allreduce gradients. 34 | 35 | Args: 36 | params (list[torch.Parameters]): List of parameters of a model 37 | coalesce (bool, optional): Whether allreduce parameters as a whole. 38 | Defaults to True. 39 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 40 | Defaults to -1. 41 | """ 42 | grads = [ 43 | param.grad.data for param in params 44 | if param.requires_grad and param.grad is not None 45 | ] 46 | world_size = dist.get_world_size() 47 | if coalesce: 48 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 49 | else: 50 | for tensor in grads: 51 | dist.all_reduce(tensor.div_(world_size)) 52 | 53 | 54 | class DistOptimizerHook(OptimizerHook): 55 | """Deprecated optimizer hook for distributed training.""" 56 | 57 | def __init__(self, *args, **kwargs): 58 | warnings.warn('"DistOptimizerHook" is deprecated, please switch to' 59 | '"mmcv.runner.OptimizerHook".') 60 | super().__init__(*args, **kwargs) 61 | 62 | 63 | def reduce_mean(tensor): 64 | """"Obtain the mean of tensor on different GPUs.""" 65 | if not (dist.is_available() and dist.is_initialized()): 66 | return tensor 67 | tensor = tensor.clone() 68 | dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) 69 | return tensor 70 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import numpy as np 4 | import torch 5 | from six.moves import map, zip 6 | 7 | from ..mask.structures import BitmapMasks, PolygonMasks 8 | 9 | 10 | def multi_apply(func, *args, **kwargs): 11 | """Apply function to a list of arguments. 12 | 13 | Note: 14 | This function applies the ``func`` to multiple inputs and 15 | map the multiple outputs of the ``func`` into different 16 | list. Each list contains the same type of outputs corresponding 17 | to different inputs. 18 | 19 | Args: 20 | func (Function): A function that will be applied to a list of 21 | arguments 22 | 23 | Returns: 24 | tuple(list): A tuple containing multiple list, each list contains \ 25 | a kind of returned results by the function 26 | """ 27 | pfunc = partial(func, **kwargs) if kwargs else func 28 | map_results = map(pfunc, *args) 29 | return tuple(map(list, zip(*map_results))) 30 | 31 | 32 | def unmap(data, count, inds, fill=0): 33 | """Unmap a subset of item (data) back to the original set of items (of size 34 | count)""" 35 | if data.dim() == 1: 36 | ret = data.new_full((count, ), fill) 37 | ret[inds.type(torch.bool)] = data 38 | else: 39 | new_size = (count, ) + data.size()[1:] 40 | ret = data.new_full(new_size, fill) 41 | ret[inds.type(torch.bool), :] = data 42 | return ret 43 | 44 | 45 | def mask2ndarray(mask): 46 | """Convert Mask to ndarray.. 47 | 48 | Args: 49 | mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or 50 | torch.Tensor or np.ndarray): The mask to be converted. 51 | 52 | Returns: 53 | np.ndarray: Ndarray mask of shape (n, h, w) that has been converted 54 | """ 55 | if isinstance(mask, (BitmapMasks, PolygonMasks)): 56 | mask = mask.to_ndarray() 57 | elif isinstance(mask, torch.Tensor): 58 | mask = mask.detach().cpu().numpy() 59 | elif not isinstance(mask, np.ndarray): 60 | raise TypeError(f'Unsupported {type(mask)} data type') 61 | return mask 62 | 63 | 64 | def flip_tensor(src_tensor, flip_direction): 65 | """flip tensor base on flip_direction. 66 | 67 | Args: 68 | src_tensor (Tensor): input feature map, shape (B, C, H, W). 69 | flip_direction (str): The flipping direction. Options are 70 | 'horizontal', 'vertical', 'diagonal'. 71 | 72 | Returns: 73 | out_tensor (Tensor): Flipped tensor. 74 | """ 75 | assert src_tensor.ndim == 4 76 | valid_directions = ['horizontal', 'vertical', 'diagonal'] 77 | assert flip_direction in valid_directions 78 | if flip_direction == 'horizontal': 79 | out_tensor = torch.flip(src_tensor, [3]) 80 | elif flip_direction == 'vertical': 81 | out_tensor = torch.flip(src_tensor, [2]) 82 | else: 83 | out_tensor = torch.flip(src_tensor, [2, 3]) 84 | return out_tensor 85 | -------------------------------------------------------------------------------- /mmdet/core/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import (color_val_matplotlib, imshow_det_bboxes, 2 | imshow_gt_det_bboxes) 3 | 4 | __all__ = ['imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, 6 | RepeatDataset) 7 | from .deepfashion import DeepFashionDataset 8 | from .lvis import LVISDataset, LVISV1Dataset, LVISV05Dataset 9 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 10 | from .utils import (NumClassCheckHook, get_loading_pipeline, 11 | replace_ImageToTensor) 12 | from .voc import VOCDataset 13 | from .wider_face import WIDERFaceDataset 14 | from .xml_style import XMLDataset 15 | 16 | __all__ = [ 17 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset', 18 | 'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'LVISV05Dataset', 19 | 'LVISV1Dataset', 'GroupSampler', 'DistributedGroupSampler', 20 | 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 21 | 'ClassBalancedDataset', 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 22 | 'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline', 23 | 'NumClassCheckHook' 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/datasets/api_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_api import COCO, COCOeval 2 | 3 | __all__ = ['COCO', 'COCOeval'] 4 | -------------------------------------------------------------------------------- /mmdet/datasets/api_wrappers/coco_api.py: -------------------------------------------------------------------------------- 1 | # This file add snake case alias for coco api 2 | 3 | import warnings 4 | 5 | import pycocotools 6 | from pycocotools.coco import COCO as _COCO 7 | from pycocotools.cocoeval import COCOeval as _COCOeval 8 | 9 | 10 | class COCO(_COCO): 11 | """This class is almost the same as official pycocotools package. 12 | 13 | It implements some snake case function aliases. So that the COCO class has 14 | the same interface as LVIS class. 15 | """ 16 | 17 | def __init__(self, annotation_file=None): 18 | if getattr(pycocotools, '__version__', '0') >= '12.0.2': 19 | warnings.warn( 20 | 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501 21 | UserWarning) 22 | super().__init__(annotation_file=annotation_file) 23 | self.img_ann_map = self.imgToAnns 24 | self.cat_img_map = self.catToImgs 25 | 26 | def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None): 27 | return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd) 28 | 29 | def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]): 30 | return self.getCatIds(cat_names, sup_names, cat_ids) 31 | 32 | def get_img_ids(self, img_ids=[], cat_ids=[]): 33 | return self.getImgIds(img_ids, cat_ids) 34 | 35 | def load_anns(self, ids): 36 | return self.loadAnns(ids) 37 | 38 | def load_cats(self, ids): 39 | return self.loadCats(ids) 40 | 41 | def load_imgs(self, ids): 42 | return self.loadImgs(ids) 43 | 44 | 45 | # just for the ease of import 46 | COCOeval = _COCOeval 47 | -------------------------------------------------------------------------------- /mmdet/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .coco import CocoDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class DeepFashionDataset(CocoDataset): 7 | 8 | CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', 9 | 'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair', 10 | 'skin', 'face') 11 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform, 2 | ContrastTransform, EqualizeTransform, Rotate, Shear, 3 | Translate) 4 | from .compose import Compose 5 | from .formating import (Collect, DefaultFormatBundle, ImageToTensor, 6 | ToDataContainer, ToTensor, Transpose, to_tensor) 7 | from .instaboost import InstaBoost 8 | from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam, 9 | LoadMultiChannelImageFromFiles, LoadProposals) 10 | from .test_time_aug import MultiScaleFlipAug 11 | from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize, 12 | Pad, PhotoMetricDistortion, RandomCenterCropPad, 13 | RandomCrop, RandomFlip, RandomShift, Resize, 14 | SegRescale) 15 | 16 | __all__ = [ 17 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 18 | 'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations', 19 | 'LoadImageFromFile', 'LoadImageFromWebcam', 20 | 'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug', 21 | 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale', 22 | 'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu', 23 | 'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'CutOut', 'Shear', 24 | 'Rotate', 'ColorTransform', 'EqualizeTransform', 'BrightnessTransform', 25 | 'ContrastTransform', 'Translate', 'RandomShift' 26 | ] 27 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose: 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler, GroupSampler 3 | from .distributed_classaware_sampler import DistributedClassAwareSampler 4 | 5 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'DistributedClassAwareSampler', 'GroupSampler'] 6 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_classaware_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv.runner import get_dist_info 7 | from torch.utils.data import Sampler 8 | 9 | 10 | class DistributedClassAwareSampler(Sampler): 11 | def __init__(self, dataset, samples_per_gpu=1, num_replicas=None, rank=None, seed=1, sample_weight_path=None): 12 | _rank, _num_replicas = get_dist_info() 13 | if num_replicas is None: 14 | num_replicas = _num_replicas 15 | if rank is None: 16 | rank = _rank 17 | self.dataset = dataset 18 | self.samples_per_gpu = samples_per_gpu 19 | self.num_replicas = num_replicas 20 | self.rank = rank 21 | self.epoch = 0 22 | self.seed = seed if seed is not None else 1 23 | 24 | assert hasattr(self.dataset, 'flag') 25 | assert len(self.dataset.flag) == len(self.dataset) 26 | self.num_samples = math.ceil(len(self.dataset) / self.num_replicas) 27 | 28 | with open(sample_weight_path, "r") as f: 29 | sample_weight = json.load(f) 30 | self.sample_weights = torch.tensor( 31 | list(sample_weight.values()), dtype=torch.float) 32 | 33 | self.indices = None 34 | self.set_epoch(-1) 35 | 36 | def __iter__(self): 37 | return iter(self.indices) 38 | 39 | def __len__(self): 40 | return self.num_samples 41 | 42 | def set_epoch(self, epoch): 43 | self.epoch = epoch 44 | 45 | g = torch.Generator() 46 | g.manual_seed(self.seed + self.epoch) 47 | 48 | indices = torch.multinomial( 49 | self.sample_weights, len(self.dataset), generator=g, replacement=True 50 | ).numpy() 51 | 52 | self.flag = self.dataset.flag[indices] 53 | 54 | self.group_sizes = np.bincount(self.flag) 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += math.ceil(self.group_sizes[i] / self.samples_per_gpu / 58 | self.num_replicas) * self.samples_per_gpu 59 | self.total_size = self.num_samples * self.num_replicas 60 | 61 | indices_group = [] 62 | for i, size in enumerate(self.group_sizes): 63 | if size > 0: 64 | flag_i_indice = np.where(self.flag == i)[0] 65 | assert len(flag_i_indice) == size 66 | 67 | indice = indices[flag_i_indice].tolist() 68 | extra = math.ceil( 69 | size / self.samples_per_gpu / self.num_replicas 70 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 71 | 72 | tmp = indice.copy() 73 | for _ in range(extra // size): 74 | indice.extend(tmp) 75 | indice.extend(tmp[:extra % size]) 76 | indices_group.extend(indice) 77 | 78 | assert len(indices_group) == self.total_size 79 | indices_group = [ 80 | indices_group[j] for i in list( 81 | torch.randperm( 82 | len(indices_group) // self.samples_per_gpu, generator=g)) 83 | for j in range(i * self.samples_per_gpu, (i + 1) * 84 | self.samples_per_gpu) 85 | ] 86 | 87 | offset = self.num_samples * self.rank 88 | indices_group = indices_group[offset:offset + self.num_samples] 89 | assert len(indices_group) == self.num_samples 90 | 91 | self.indices = indices_group 92 | 93 | 94 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | 7 | class DistributedSampler(_DistributedSampler): 8 | 9 | def __init__(self, 10 | dataset, 11 | num_replicas=None, 12 | rank=None, 13 | shuffle=True, 14 | seed=0): 15 | super().__init__( 16 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 17 | # for the compatibility from PyTorch 1.3+ 18 | self.seed = seed if seed is not None else 0 19 | 20 | def __iter__(self): 21 | # deterministically shuffle based on epoch 22 | if self.shuffle: 23 | g = torch.Generator() 24 | g.manual_seed(self.epoch + self.seed) 25 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | indices = indices[self.rank:self.total_size:self.num_replicas] 37 | assert len(indices) == self.num_samples 38 | 39 | return iter(indices) 40 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .builder import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module() 11 | class WIDERFaceDataset(XMLDataset): 12 | """Reader for the WIDER Face dataset in PASCAL VOC format. 13 | 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | """Load annotation from WIDERFace XML style annotation file. 24 | 25 | Args: 26 | ann_file (str): Path of XML file. 27 | 28 | Returns: 29 | list[dict]: Annotation info from XML file. 30 | """ 31 | 32 | data_infos = [] 33 | img_ids = mmcv.list_from_file(ann_file) 34 | for img_id in img_ids: 35 | filename = f'{img_id}.jpg' 36 | xml_path = osp.join(self.img_prefix, 'Annotations', 37 | f'{img_id}.xml') 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | size = root.find('size') 41 | width = int(size.find('width').text) 42 | height = int(size.find('height').text) 43 | folder = root.find('folder').text 44 | data_infos.append( 45 | dict( 46 | id=img_id, 47 | filename=osp.join(folder, filename), 48 | width=width, 49 | height=height)) 50 | 51 | return data_infos 52 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 3 | ROI_EXTRACTORS, SHARED_HEADS, build_backbone, 4 | build_detector, build_head, build_loss, build_neck, 5 | build_roi_extractor, build_shared_head) 6 | from .dense_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .losses import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .roi_heads import * # noqa: F401,F403 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 14 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 15 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .mobilenet_v2 import MobileNetV2 7 | from .regnet import RegNet 8 | from .res2net import Res2Net 9 | from .resnest import ResNeSt 10 | from .resnet import ResNet, ResNetV1d 11 | from .resnext import ResNeXt 12 | from .ssd_vgg import SSDVGG 13 | from .trident_resnet import TridentResNet 14 | from .swin_transformer import SwinTransformer 15 | from .cbnet import CBResNet, CBRes2Net, CBSwinTransformer 16 | 17 | __all__ = [ 18 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 19 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 20 | 'ResNeSt', 'TridentResNet', 'SwinTransformer', 'CBResNet', 'CBRes2Net', 'CBSwinTransformer' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmcv.cnn import MODELS as MMCV_MODELS 4 | from mmcv.utils import Registry 5 | 6 | MODELS = Registry('models', parent=MMCV_MODELS) 7 | 8 | BACKBONES = MODELS 9 | NECKS = MODELS 10 | ROI_EXTRACTORS = MODELS 11 | SHARED_HEADS = MODELS 12 | HEADS = MODELS 13 | LOSSES = MODELS 14 | DETECTORS = MODELS 15 | 16 | 17 | def build_backbone(cfg): 18 | """Build backbone.""" 19 | return BACKBONES.build(cfg) 20 | 21 | 22 | def build_neck(cfg): 23 | """Build neck.""" 24 | return NECKS.build(cfg) 25 | 26 | 27 | def build_roi_extractor(cfg): 28 | """Build roi extractor.""" 29 | return ROI_EXTRACTORS.build(cfg) 30 | 31 | 32 | def build_shared_head(cfg): 33 | """Build shared head.""" 34 | return SHARED_HEADS.build(cfg) 35 | 36 | 37 | def build_head(cfg): 38 | """Build head.""" 39 | return HEADS.build(cfg) 40 | 41 | 42 | def build_loss(cfg): 43 | """Build loss.""" 44 | return LOSSES.build(cfg) 45 | 46 | 47 | def build_detector(cfg, train_cfg=None, test_cfg=None): 48 | """Build detector.""" 49 | if train_cfg is not None or test_cfg is not None: 50 | warnings.warn( 51 | 'train_cfg and test_cfg is deprecated, ' 52 | 'please specify them in model', UserWarning) 53 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 54 | 'train_cfg specified in both outer field and model field ' 55 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 56 | 'test_cfg specified in both outer field and model field ' 57 | return DETECTORS.build( 58 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 59 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_head import AnchorFreeHead 2 | from .anchor_head import AnchorHead 3 | from .atss_head import ATSSHead 4 | from .autoassign_head import AutoAssignHead 5 | from .cascade_rpn_head import CascadeRPNHead, StageCascadeRPNHead 6 | from .centernet_head import CenterNetHead 7 | from .centripetal_head import CentripetalHead 8 | from .corner_head import CornerHead 9 | from .deformable_detr_head import DeformableDETRHead 10 | from .detr_head import DETRHead 11 | from .embedding_rpn_head import EmbeddingRPNHead 12 | from .fcos_head import FCOSHead 13 | from .fovea_head import FoveaHead 14 | from .free_anchor_retina_head import FreeAnchorRetinaHead 15 | from .fsaf_head import FSAFHead 16 | from .ga_retina_head import GARetinaHead 17 | from .ga_rpn_head import GARPNHead 18 | from .gfl_head import GFLHead 19 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 20 | from .ld_head import LDHead 21 | from .nasfcos_head import NASFCOSHead 22 | from .paa_head import PAAHead 23 | from .pisa_retinanet_head import PISARetinaHead 24 | from .pisa_ssd_head import PISASSDHead 25 | from .reppoints_head import RepPointsHead 26 | from .retina_head import RetinaHead 27 | from .retina_sepbn_head import RetinaSepBNHead 28 | from .rpn_head import RPNHead 29 | from .sabl_retina_head import SABLRetinaHead 30 | from .ssd_head import SSDHead 31 | from .vfnet_head import VFNetHead 32 | from .yolact_head import YOLACTHead, YOLACTProtonet, YOLACTSegmHead 33 | from .yolo_head import YOLOV3Head 34 | from .yolof_head import YOLOFHead 35 | 36 | __all__ = [ 37 | 'AnchorFreeHead', 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 38 | 'RPNHead', 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 39 | 'SSDHead', 'FCOSHead', 'RepPointsHead', 'FoveaHead', 40 | 'FreeAnchorRetinaHead', 'ATSSHead', 'FSAFHead', 'NASFCOSHead', 41 | 'PISARetinaHead', 'PISASSDHead', 'GFLHead', 'CornerHead', 'YOLACTHead', 42 | 'YOLACTSegmHead', 'YOLACTProtonet', 'YOLOV3Head', 'PAAHead', 43 | 'SABLRetinaHead', 'CentripetalHead', 'VFNetHead', 'StageCascadeRPNHead', 44 | 'CascadeRPNHead', 'EmbeddingRPNHead', 'LDHead', 'CascadeRPNHead', 45 | 'AutoAssignHead', 'DETRHead', 'YOLOFHead', 'DeformableDETRHead', 46 | 'CenterNetHead' 47 | ] 48 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/base_dense_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | from mmcv.runner import BaseModule 4 | 5 | 6 | class BaseDenseHead(BaseModule, metaclass=ABCMeta): 7 | """Base class for DenseHeads.""" 8 | 9 | def __init__(self, init_cfg=None): 10 | super(BaseDenseHead, self).__init__(init_cfg) 11 | 12 | @abstractmethod 13 | def loss(self, **kwargs): 14 | """Compute losses of the head.""" 15 | pass 16 | 17 | @abstractmethod 18 | def get_bboxes(self, **kwargs): 19 | """Transform network output for a batch into bbox predictions.""" 20 | pass 21 | 22 | def forward_train(self, 23 | x, 24 | img_metas, 25 | gt_bboxes, 26 | gt_labels=None, 27 | gt_bboxes_ignore=None, 28 | proposal_cfg=None, 29 | **kwargs): 30 | """ 31 | Args: 32 | x (list[Tensor]): Features from FPN. 33 | img_metas (list[dict]): Meta information of each image, e.g., 34 | image size, scaling factor, etc. 35 | gt_bboxes (Tensor): Ground truth bboxes of the image, 36 | shape (num_gts, 4). 37 | gt_labels (Tensor): Ground truth labels of each box, 38 | shape (num_gts,). 39 | gt_bboxes_ignore (Tensor): Ground truth bboxes to be 40 | ignored, shape (num_ignored_gts, 4). 41 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 42 | if None, test_cfg would be used 43 | 44 | Returns: 45 | tuple: 46 | losses: (dict[str, Tensor]): A dictionary of loss components. 47 | proposal_list (list[Tensor]): Proposals of each image. 48 | """ 49 | outs = self(x) 50 | if gt_labels is None: 51 | loss_inputs = outs + (gt_bboxes, img_metas) 52 | else: 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) 54 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | if proposal_cfg is None: 56 | return losses 57 | else: 58 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 59 | return losses, proposal_list 60 | 61 | def simple_test(self, feats, img_metas, rescale=False): 62 | """Test function without test-time augmentation. 63 | 64 | Args: 65 | feats (tuple[torch.Tensor]): Multi-level features from the 66 | upstream network, each is a 4D-tensor. 67 | img_metas (list[dict]): List of image information. 68 | rescale (bool, optional): Whether to rescale the results. 69 | Defaults to False. 70 | 71 | Returns: 72 | list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. 73 | The first item is ``bboxes`` with shape (n, 5), 74 | where 5 represent (tl_x, tl_y, br_x, br_y, score). 75 | The shape of the second tensor in the tuple is ``labels`` 76 | with shape (n,) 77 | """ 78 | return self.simple_test_bboxes(feats, img_metas, rescale=rescale) 79 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/nasfcos_head.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule, Scale 5 | 6 | from mmdet.models.dense_heads.fcos_head import FCOSHead 7 | from ..builder import HEADS 8 | 9 | 10 | @HEADS.register_module() 11 | class NASFCOSHead(FCOSHead): 12 | """Anchor-free head used in `NASFCOS `_. 13 | 14 | It is quite similar with FCOS head, except for the searched structure of 15 | classification branch and bbox regression branch, where a structure of 16 | "dconv3x3, conv3x3, dconv3x3, conv1x1" is utilized instead. 17 | """ 18 | 19 | def __init__(self, *args, init_cfg=None, **kwargs): 20 | if init_cfg is None: 21 | init_cfg = [ 22 | dict(type='Caffe2Xavier', layer=['ConvModule', 'Conv2d']), 23 | dict( 24 | type='Normal', 25 | std=0.01, 26 | override=[ 27 | dict(name='conv_reg'), 28 | dict(name='conv_centerness'), 29 | dict( 30 | name='conv_cls', 31 | type='Normal', 32 | std=0.01, 33 | bias_prob=0.01) 34 | ]), 35 | ] 36 | super(NASFCOSHead, self).__init__(*args, init_cfg=init_cfg, **kwargs) 37 | 38 | def _init_layers(self): 39 | """Initialize layers of the head.""" 40 | dconv3x3_config = dict( 41 | type='DCNv2', 42 | kernel_size=3, 43 | use_bias=True, 44 | deform_groups=2, 45 | padding=1) 46 | conv3x3_config = dict(type='Conv', kernel_size=3, padding=1) 47 | conv1x1_config = dict(type='Conv', kernel_size=1) 48 | 49 | self.arch_config = [ 50 | dconv3x3_config, conv3x3_config, dconv3x3_config, conv1x1_config 51 | ] 52 | self.cls_convs = nn.ModuleList() 53 | self.reg_convs = nn.ModuleList() 54 | for i, op_ in enumerate(self.arch_config): 55 | op = copy.deepcopy(op_) 56 | chn = self.in_channels if i == 0 else self.feat_channels 57 | assert isinstance(op, dict) 58 | use_bias = op.pop('use_bias', False) 59 | padding = op.pop('padding', 0) 60 | kernel_size = op.pop('kernel_size') 61 | module = ConvModule( 62 | chn, 63 | self.feat_channels, 64 | kernel_size, 65 | stride=1, 66 | padding=padding, 67 | norm_cfg=self.norm_cfg, 68 | bias=use_bias, 69 | conv_cfg=op) 70 | 71 | self.cls_convs.append(copy.deepcopy(module)) 72 | self.reg_convs.append(copy.deepcopy(module)) 73 | 74 | self.conv_cls = nn.Conv2d( 75 | self.feat_channels, self.cls_out_channels, 3, padding=1) 76 | self.conv_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1) 77 | self.conv_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1) 78 | 79 | self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides]) 80 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss import ATSS 2 | from .autoassign import AutoAssign 3 | from .base import BaseDetector 4 | from .cascade_rcnn import CascadeRCNN 5 | from .centernet import CenterNet 6 | from .cornernet import CornerNet 7 | from .deformable_detr import DeformableDETR 8 | from .detr import DETR 9 | from .fast_rcnn import FastRCNN 10 | from .faster_rcnn import FasterRCNN 11 | from .fcos import FCOS 12 | from .fovea import FOVEA 13 | from .fsaf import FSAF 14 | from .gfl import GFL 15 | from .grid_rcnn import GridRCNN 16 | from .htc import HybridTaskCascade 17 | from .kd_one_stage import KnowledgeDistillationSingleStageDetector 18 | from .mask_rcnn import MaskRCNN 19 | from .mask_scoring_rcnn import MaskScoringRCNN 20 | from .nasfcos import NASFCOS 21 | from .paa import PAA 22 | from .point_rend import PointRend 23 | from .reppoints_detector import RepPointsDetector 24 | from .retinanet import RetinaNet 25 | from .rpn import RPN 26 | from .scnet import SCNet 27 | from .single_stage import SingleStageDetector 28 | from .sparse_rcnn import SparseRCNN 29 | from .trident_faster_rcnn import TridentFasterRCNN 30 | from .two_stage import TwoStageDetector 31 | from .vfnet import VFNet 32 | from .yolact import YOLACT 33 | from .yolo import YOLOV3 34 | from .yolof import YOLOF 35 | 36 | __all__ = [ 37 | 'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 38 | 'KnowledgeDistillationSingleStageDetector', 'FastRCNN', 'FasterRCNN', 39 | 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 'RetinaNet', 'FCOS', 40 | 'GridRCNN', 'MaskScoringRCNN', 'RepPointsDetector', 'FOVEA', 'FSAF', 41 | 'NASFCOS', 'PointRend', 'GFL', 'CornerNet', 'PAA', 'YOLOV3', 'YOLACT', 42 | 'VFNet', 'DETR', 'TridentFasterRCNN', 'SparseRCNN', 'SCNet', 43 | 'DeformableDETR', 'AutoAssign', 'YOLOF', 'CenterNet' 44 | ] 45 | -------------------------------------------------------------------------------- /mmdet/models/detectors/atss.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class ATSS(SingleStageDetector): 7 | """Implementation of `ATSS `_.""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/autoassign.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class AutoAssign(SingleStageDetector): 7 | """Implementation of `AutoAssign: Differentiable Label Assignment for Dense 8 | Object Detection `_.""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None): 17 | super(AutoAssign, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class CascadeRCNN(TwoStageDetector): 7 | r"""Implementation of `Cascade R-CNN: Delving into High Quality Object 8 | Detection `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck=None, 13 | rpn_head=None, 14 | roi_head=None, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(CascadeRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | roi_head=roi_head, 24 | train_cfg=train_cfg, 25 | test_cfg=test_cfg, 26 | pretrained=pretrained, 27 | init_cfg=init_cfg) 28 | 29 | def show_result(self, data, result, **kwargs): 30 | """Show prediction results of the detector. 31 | 32 | Args: 33 | data (str or np.ndarray): Image filename or loaded image. 34 | result (Tensor or tuple): The results to draw over `img` 35 | bbox_result or (bbox_result, segm_result). 36 | 37 | Returns: 38 | np.ndarray: The image with bboxes drawn on it. 39 | """ 40 | if self.with_mask: 41 | ms_bbox_result, ms_segm_result = result 42 | if isinstance(ms_bbox_result, dict): 43 | result = (ms_bbox_result['ensemble'], 44 | ms_segm_result['ensemble']) 45 | else: 46 | if isinstance(result, dict): 47 | result = result['ensemble'] 48 | return super(CascadeRCNN, self).show_result(data, result, **kwargs) 49 | -------------------------------------------------------------------------------- /mmdet/models/detectors/deformable_detr.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .detr import DETR 3 | 4 | 5 | @DETECTORS.register_module() 6 | class DeformableDETR(DETR): 7 | 8 | def __init__(self, *args, **kwargs): 9 | super(DETR, self).__init__(*args, **kwargs) 10 | -------------------------------------------------------------------------------- /mmdet/models/detectors/detr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import DETECTORS 4 | from .single_stage import SingleStageDetector 5 | 6 | 7 | @DETECTORS.register_module() 8 | class DETR(SingleStageDetector): 9 | r"""Implementation of `DETR: End-to-End Object Detection with 10 | Transformers `_""" 11 | 12 | def __init__(self, 13 | backbone, 14 | bbox_head, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(DETR, self).__init__(backbone, None, bbox_head, train_cfg, 20 | test_cfg, pretrained, init_cfg) 21 | 22 | # over-write `onnx_export` because: 23 | # (1) the forward of bbox_head requires img_metas 24 | # (2) the different behavior (e.g. construction of `masks`) between 25 | # torch and ONNX model, during the forward of bbox_head 26 | def onnx_export(self, img, img_metas): 27 | """Test function for exporting to ONNX, without test time augmentation. 28 | 29 | Args: 30 | img (torch.Tensor): input images. 31 | img_metas (list[dict]): List of image information. 32 | 33 | Returns: 34 | tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] 35 | and class labels of shape [N, num_det]. 36 | """ 37 | x = self.extract_feat(img) 38 | # forward of this head requires img_metas 39 | outs = self.bbox_head.forward_onnx(x, img_metas) 40 | # get shape as tensor 41 | img_shape = torch._shape_as_tensor(img)[2:] 42 | img_metas[0]['img_shape_for_onnx'] = img_shape 43 | 44 | det_bboxes, det_labels = self.bbox_head.onnx_export(*outs, img_metas) 45 | 46 | return det_bboxes, det_labels 47 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FastRCNN(TwoStageDetector): 7 | """Implementation of `Fast R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | roi_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(FastRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | roi_head=roi_head, 21 | train_cfg=train_cfg, 22 | test_cfg=test_cfg, 23 | pretrained=pretrained, 24 | init_cfg=init_cfg) 25 | 26 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 27 | """ 28 | Args: 29 | imgs (List[Tensor]): the outer list indicates test-time 30 | augmentations and inner Tensor should have a shape NxCxHxW, 31 | which contains all images in the batch. 32 | img_metas (List[List[dict]]): the outer list indicates test-time 33 | augs (multiscale, flip, etc.) and the inner list indicates 34 | images in a batch. 35 | proposals (List[List[Tensor]]): the outer list indicates test-time 36 | augs (multiscale, flip, etc.) and the inner list indicates 37 | images in a batch. The Tensor should have a shape Px4, where 38 | P is the number of proposals. 39 | """ 40 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 41 | if not isinstance(var, list): 42 | raise TypeError(f'{name} must be a list, but got {type(var)}') 43 | 44 | num_augs = len(imgs) 45 | if num_augs != len(img_metas): 46 | raise ValueError(f'num of augmentations ({len(imgs)}) ' 47 | f'!= num of image meta ({len(img_metas)})') 48 | 49 | if num_augs == 1: 50 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 51 | **kwargs) 52 | else: 53 | # TODO: support test-time augmentation 54 | assert NotImplementedError 55 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FasterRCNN(TwoStageDetector): 7 | """Implementation of `Faster R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | rpn_head=rpn_head, 22 | roi_head=roi_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained, 26 | init_cfg=init_cfg) 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FCOS(SingleStageDetector): 7 | """Implementation of `FCOS `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fovea.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FOVEA(SingleStageDetector): 7 | """Implementation of `FoveaBox `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fsaf.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FSAF(SingleStageDetector): 7 | """Implementation of `FSAF `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(FSAF, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/gfl.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class GFL(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None, 15 | init_cfg=None): 16 | super(GFL, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained, init_cfg) 18 | -------------------------------------------------------------------------------- /mmdet/models/detectors/grid_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class GridRCNN(TwoStageDetector): 7 | """Grid R-CNN. 8 | 9 | This detector is the implementation of: 10 | - Grid R-CNN (https://arxiv.org/abs/1811.12030) 11 | - Grid R-CNN Plus: Faster and Better (https://arxiv.org/abs/1906.05688) 12 | """ 13 | 14 | def __init__(self, 15 | backbone, 16 | rpn_head, 17 | roi_head, 18 | train_cfg, 19 | test_cfg, 20 | neck=None, 21 | pretrained=None, 22 | init_cfg=None): 23 | super(GridRCNN, self).__init__( 24 | backbone=backbone, 25 | neck=neck, 26 | rpn_head=rpn_head, 27 | roi_head=roi_head, 28 | train_cfg=train_cfg, 29 | test_cfg=test_cfg, 30 | pretrained=pretrained, 31 | init_cfg=init_cfg) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/htc.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .cascade_rcnn import CascadeRCNN 3 | 4 | 5 | @DETECTORS.register_module() 6 | class HybridTaskCascade(CascadeRCNN): 7 | """Implementation of `HTC `_""" 8 | 9 | def __init__(self, **kwargs): 10 | super(HybridTaskCascade, self).__init__(**kwargs) 11 | 12 | @property 13 | def with_semantic(self): 14 | """bool: whether the detector has a semantic head""" 15 | return self.roi_head.with_semantic 16 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskRCNN(TwoStageDetector): 7 | """Implementation of `Mask R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(MaskRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | rpn_head=rpn_head, 22 | roi_head=roi_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained, 26 | init_cfg=init_cfg) 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_scoring_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskScoringRCNN(TwoStageDetector): 7 | """Mask Scoring RCNN. 8 | 9 | https://arxiv.org/abs/1903.00241 10 | """ 11 | 12 | def __init__(self, 13 | backbone, 14 | rpn_head, 15 | roi_head, 16 | train_cfg, 17 | test_cfg, 18 | neck=None, 19 | pretrained=None, 20 | init_cfg=None): 21 | super(MaskScoringRCNN, self).__init__( 22 | backbone=backbone, 23 | neck=neck, 24 | rpn_head=rpn_head, 25 | roi_head=roi_head, 26 | train_cfg=train_cfg, 27 | test_cfg=test_cfg, 28 | pretrained=pretrained, 29 | init_cfg=init_cfg) 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/nasfcos.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class NASFCOS(SingleStageDetector): 7 | """NAS-FCOS: Fast Neural Architecture Search for Object Detection. 8 | 9 | https://arxiv.org/abs/1906.0442 10 | """ 11 | 12 | def __init__(self, 13 | backbone, 14 | neck, 15 | bbox_head, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None, 19 | init_cfg=None): 20 | super(NASFCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 21 | test_cfg, pretrained, init_cfg) 22 | -------------------------------------------------------------------------------- /mmdet/models/detectors/paa.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class PAA(SingleStageDetector): 7 | """Implementation of `PAA `_.""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(PAA, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/point_rend.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class PointRend(TwoStageDetector): 7 | """PointRend: Image Segmentation as Rendering 8 | 9 | This detector is the implementation of 10 | `PointRend `_. 11 | 12 | """ 13 | 14 | def __init__(self, 15 | backbone, 16 | rpn_head, 17 | roi_head, 18 | train_cfg, 19 | test_cfg, 20 | neck=None, 21 | pretrained=None, 22 | init_cfg=None): 23 | super(PointRend, self).__init__( 24 | backbone=backbone, 25 | neck=neck, 26 | rpn_head=rpn_head, 27 | roi_head=roi_head, 28 | train_cfg=train_cfg, 29 | test_cfg=test_cfg, 30 | pretrained=pretrained, 31 | init_cfg=init_cfg) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/reppoints_detector.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class RepPointsDetector(SingleStageDetector): 7 | """RepPoints: Point Set Representation for Object Detection. 8 | 9 | This detector is the implementation of: 10 | - RepPoints detector (https://arxiv.org/pdf/1904.11490) 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | bbox_head, 17 | train_cfg=None, 18 | test_cfg=None, 19 | pretrained=None, 20 | init_cfg=None): 21 | super(RepPointsDetector, 22 | self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg, 23 | pretrained, init_cfg) 24 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class RetinaNet(SingleStageDetector): 7 | """Implementation of `RetinaNet `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/models/detectors/scnet.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .cascade_rcnn import CascadeRCNN 3 | 4 | 5 | @DETECTORS.register_module() 6 | class SCNet(CascadeRCNN): 7 | """Implementation of `SCNet `_""" 8 | 9 | def __init__(self, **kwargs): 10 | super(SCNet, self).__init__(**kwargs) 11 | -------------------------------------------------------------------------------- /mmdet/models/detectors/trident_faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .faster_rcnn import FasterRCNN 3 | 4 | 5 | @DETECTORS.register_module() 6 | class TridentFasterRCNN(FasterRCNN): 7 | """Implementation of `TridentNet `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | 19 | super(TridentFasterRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | roi_head=roi_head, 24 | train_cfg=train_cfg, 25 | test_cfg=test_cfg, 26 | pretrained=pretrained, 27 | init_cfg=init_cfg) 28 | assert self.backbone.num_branch == self.roi_head.num_branch 29 | assert self.backbone.test_branch_idx == self.roi_head.test_branch_idx 30 | self.num_branch = self.backbone.num_branch 31 | self.test_branch_idx = self.backbone.test_branch_idx 32 | 33 | def simple_test(self, img, img_metas, proposals=None, rescale=False): 34 | """Test without augmentation.""" 35 | assert self.with_bbox, 'Bbox head must be implemented.' 36 | x = self.extract_feat(img) 37 | if proposals is None: 38 | num_branch = (self.num_branch if self.test_branch_idx == -1 else 1) 39 | trident_img_metas = img_metas * num_branch 40 | proposal_list = self.rpn_head.simple_test_rpn(x, trident_img_metas) 41 | else: 42 | proposal_list = proposals 43 | 44 | return self.roi_head.simple_test( 45 | x, proposal_list, trident_img_metas, rescale=rescale) 46 | 47 | def aug_test(self, imgs, img_metas, rescale=False): 48 | """Test with augmentations. 49 | 50 | If rescale is False, then returned bboxes and masks will fit the scale 51 | of imgs[0]. 52 | """ 53 | x = self.extract_feats(imgs) 54 | num_branch = (self.num_branch if self.test_branch_idx == -1 else 1) 55 | trident_img_metas = [img_metas * num_branch for img_metas in img_metas] 56 | proposal_list = self.rpn_head.aug_test_rpn(x, trident_img_metas) 57 | return self.roi_head.aug_test( 58 | x, proposal_list, img_metas, rescale=rescale) 59 | 60 | def forward_train(self, img, img_metas, gt_bboxes, gt_labels, **kwargs): 61 | """make copies of img and gts to fit multi-branch.""" 62 | trident_gt_bboxes = tuple(gt_bboxes * self.num_branch) 63 | trident_gt_labels = tuple(gt_labels * self.num_branch) 64 | trident_img_metas = tuple(img_metas * self.num_branch) 65 | 66 | return super(TridentFasterRCNN, 67 | self).forward_train(img, trident_img_metas, 68 | trident_gt_bboxes, trident_gt_labels) 69 | -------------------------------------------------------------------------------- /mmdet/models/detectors/vfnet.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class VFNet(SingleStageDetector): 7 | """Implementation of `VarifocalNet 8 | (VFNet).`_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(VFNet, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/yolo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Western Digital Corporation or its affiliates. 2 | 3 | from ..builder import DETECTORS 4 | from .single_stage import SingleStageDetector 5 | 6 | 7 | @DETECTORS.register_module() 8 | class YOLOV3(SingleStageDetector): 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(YOLOV3, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/yolof.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class YOLOF(SingleStageDetector): 7 | r"""Implementation of `You Only Look One-level Feature 8 | `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None): 17 | super(YOLOF, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained) 19 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .ae_loss import AssociativeEmbeddingLoss 3 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 4 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 5 | cross_entropy, mask_cross_entropy) 6 | from .focal_loss import FocalLoss, sigmoid_focal_loss 7 | from .gaussian_focal_loss import GaussianFocalLoss 8 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss 9 | from .ghm_loss import GHMC, GHMR 10 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss, 11 | bounded_iou_loss, iou_loss) 12 | from .kd_loss import KnowledgeDistillationKLDivLoss 13 | from .mse_loss import MSELoss, mse_loss 14 | from .pisa_loss import carl_loss, isr_p 15 | from .seesaw_loss import SeesawLoss 16 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 17 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 18 | from .varifocal_loss import VarifocalLoss 19 | 20 | __all__ = [ 21 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 22 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 23 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 24 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 25 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC', 26 | 'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 27 | 'l1_loss', 'isr_p', 'carl_loss', 'AssociativeEmbeddingLoss', 28 | 'GaussianFocalLoss', 'QualityFocalLoss', 'DistributionFocalLoss', 29 | 'VarifocalLoss', 'KnowledgeDistillationKLDivLoss', 'SeesawLoss' 30 | ] 31 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import torch.nn as nn 3 | 4 | 5 | @mmcv.jit(coderize=True) 6 | def accuracy(pred, target, topk=1, thresh=None): 7 | """Calculate accuracy according to the prediction and target. 8 | 9 | Args: 10 | pred (torch.Tensor): The model prediction, shape (N, num_class) 11 | target (torch.Tensor): The target of each prediction, shape (N, ) 12 | topk (int | tuple[int], optional): If the predictions in ``topk`` 13 | matches the target, the predictions will be regarded as 14 | correct ones. Defaults to 1. 15 | thresh (float, optional): If not None, predictions with scores under 16 | this threshold are considered incorrect. Default to None. 17 | 18 | Returns: 19 | float | tuple[float]: If the input ``topk`` is a single integer, 20 | the function will return a single float as accuracy. If 21 | ``topk`` is a tuple containing multiple integers, the 22 | function will return a tuple containing accuracies of 23 | each ``topk`` number. 24 | """ 25 | assert isinstance(topk, (int, tuple)) 26 | if isinstance(topk, int): 27 | topk = (topk, ) 28 | return_single = True 29 | else: 30 | return_single = False 31 | 32 | maxk = max(topk) 33 | if pred.size(0) == 0: 34 | accu = [pred.new_tensor(0.) for i in range(len(topk))] 35 | return accu[0] if return_single else accu 36 | assert pred.ndim == 2 and target.ndim == 1 37 | assert pred.size(0) == target.size(0) 38 | assert maxk <= pred.size(1), \ 39 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 40 | pred_value, pred_label = pred.topk(maxk, dim=1) 41 | pred_label = pred_label.t() # transpose to shape (maxk, N) 42 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 43 | if thresh is not None: 44 | # Only prediction values larger than thresh are counted as correct 45 | correct = correct & (pred_value > thresh).t() 46 | res = [] 47 | for k in topk: 48 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 49 | res.append(correct_k.mul_(100.0 / pred.size(0))) 50 | return res[0] if return_single else res 51 | 52 | 53 | class Accuracy(nn.Module): 54 | 55 | def __init__(self, topk=(1, ), thresh=None): 56 | """Module to calculate the accuracy. 57 | 58 | Args: 59 | topk (tuple, optional): The criterion used to calculate the 60 | accuracy. Defaults to (1,). 61 | thresh (float, optional): If not None, predictions with scores 62 | under this threshold are considered incorrect. Default to None. 63 | """ 64 | super().__init__() 65 | self.topk = topk 66 | self.thresh = thresh 67 | 68 | def forward(self, pred, target): 69 | """Forward function to calculate accuracy. 70 | 71 | Args: 72 | pred (torch.Tensor): Prediction of models. 73 | target (torch.Tensor): Target for each prediction. 74 | 75 | Returns: 76 | tuple[float]: The accuracies under different topk criterions. 77 | """ 78 | return accuracy(pred, target, self.topk, self.thresh) 79 | -------------------------------------------------------------------------------- /mmdet/models/losses/gaussian_focal_loss.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import torch.nn as nn 3 | 4 | from ..builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @mmcv.jit(derivate=True, coderize=True) 9 | @weighted_loss 10 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): 11 | """`Focal Loss `_ for targets in gaussian 12 | distribution. 13 | 14 | Args: 15 | pred (torch.Tensor): The prediction. 16 | gaussian_target (torch.Tensor): The learning target of the prediction 17 | in gaussian distribution. 18 | alpha (float, optional): A balanced form for Focal Loss. 19 | Defaults to 2.0. 20 | gamma (float, optional): The gamma for calculating the modulating 21 | factor. Defaults to 4.0. 22 | """ 23 | eps = 1e-12 24 | pos_weights = gaussian_target.eq(1) 25 | neg_weights = (1 - gaussian_target).pow(gamma) 26 | pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights 27 | neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights 28 | return pos_loss + neg_loss 29 | 30 | 31 | @LOSSES.register_module() 32 | class GaussianFocalLoss(nn.Module): 33 | """GaussianFocalLoss is a variant of focal loss. 34 | 35 | More details can be found in the `paper 36 | `_ 37 | Code is modified from `kp_utils.py 38 | `_ # noqa: E501 39 | Please notice that the target in GaussianFocalLoss is a gaussian heatmap, 40 | not 0/1 binary target. 41 | 42 | Args: 43 | alpha (float): Power of prediction. 44 | gamma (float): Power of target for negative samples. 45 | reduction (str): Options are "none", "mean" and "sum". 46 | loss_weight (float): Loss weight of current loss. 47 | """ 48 | 49 | def __init__(self, 50 | alpha=2.0, 51 | gamma=4.0, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(GaussianFocalLoss, self).__init__() 55 | self.alpha = alpha 56 | self.gamma = gamma 57 | self.reduction = reduction 58 | self.loss_weight = loss_weight 59 | 60 | def forward(self, 61 | pred, 62 | target, 63 | weight=None, 64 | avg_factor=None, 65 | reduction_override=None): 66 | """Forward function. 67 | 68 | Args: 69 | pred (torch.Tensor): The prediction. 70 | target (torch.Tensor): The learning target of the prediction 71 | in gaussian distribution. 72 | weight (torch.Tensor, optional): The weight of loss for each 73 | prediction. Defaults to None. 74 | avg_factor (int, optional): Average factor that is used to average 75 | the loss. Defaults to None. 76 | reduction_override (str, optional): The reduction method used to 77 | override the original reduction method of the loss. 78 | Defaults to None. 79 | """ 80 | assert reduction_override in (None, 'none', 'mean', 'sum') 81 | reduction = ( 82 | reduction_override if reduction_override else self.reduction) 83 | loss_reg = self.loss_weight * gaussian_focal_loss( 84 | pred, 85 | target, 86 | weight, 87 | alpha=self.alpha, 88 | gamma=self.gamma, 89 | reduction=reduction, 90 | avg_factor=avg_factor) 91 | return loss_reg 92 | -------------------------------------------------------------------------------- /mmdet/models/losses/kd_loss.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..builder import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @mmcv.jit(derivate=True, coderize=True) 10 | @weighted_loss 11 | def knowledge_distillation_kl_div_loss(pred, 12 | soft_label, 13 | T, 14 | detach_target=True): 15 | r"""Loss function for knowledge distilling using KL divergence. 16 | 17 | Args: 18 | pred (Tensor): Predicted logits with shape (N, n + 1). 19 | soft_label (Tensor): Target logits with shape (N, N + 1). 20 | T (int): Temperature for distillation. 21 | detach_target (bool): Remove soft_label from automatic differentiation 22 | 23 | Returns: 24 | torch.Tensor: Loss tensor with shape (N,). 25 | """ 26 | assert pred.size() == soft_label.size() 27 | target = F.softmax(soft_label / T, dim=1) 28 | if detach_target: 29 | target = target.detach() 30 | 31 | kd_loss = F.kl_div( 32 | F.log_softmax(pred / T, dim=1), target, reduction='none').mean(1) * ( 33 | T * T) 34 | 35 | return kd_loss 36 | 37 | 38 | @LOSSES.register_module() 39 | class KnowledgeDistillationKLDivLoss(nn.Module): 40 | """Loss function for knowledge distilling using KL divergence. 41 | 42 | Args: 43 | reduction (str): Options are `'none'`, `'mean'` and `'sum'`. 44 | loss_weight (float): Loss weight of current loss. 45 | T (int): Temperature for distillation. 46 | """ 47 | 48 | def __init__(self, reduction='mean', loss_weight=1.0, T=10): 49 | super(KnowledgeDistillationKLDivLoss, self).__init__() 50 | assert T >= 1 51 | self.reduction = reduction 52 | self.loss_weight = loss_weight 53 | self.T = T 54 | 55 | def forward(self, 56 | pred, 57 | soft_label, 58 | weight=None, 59 | avg_factor=None, 60 | reduction_override=None): 61 | """Forward function. 62 | 63 | Args: 64 | pred (Tensor): Predicted logits with shape (N, n + 1). 65 | soft_label (Tensor): Target logits with shape (N, N + 1). 66 | weight (torch.Tensor, optional): The weight of loss for each 67 | prediction. Defaults to None. 68 | avg_factor (int, optional): Average factor that is used to average 69 | the loss. Defaults to None. 70 | reduction_override (str, optional): The reduction method used to 71 | override the original reduction method of the loss. 72 | Defaults to None. 73 | """ 74 | assert reduction_override in (None, 'none', 'mean', 'sum') 75 | 76 | reduction = ( 77 | reduction_override if reduction_override else self.reduction) 78 | 79 | loss_kd = self.loss_weight * knowledge_distillation_kl_div_loss( 80 | pred, 81 | soft_label, 82 | weight, 83 | reduction=reduction, 84 | avg_factor=avg_factor, 85 | T=self.T) 86 | 87 | return loss_kd 88 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def mse_loss(pred, target): 10 | """Warpper of mse loss.""" 11 | return F.mse_loss(pred, target, reduction='none') 12 | 13 | 14 | @LOSSES.register_module() 15 | class MSELoss(nn.Module): 16 | """MSELoss. 17 | 18 | Args: 19 | reduction (str, optional): The method that reduces the loss to a 20 | scalar. Options are "none", "mean" and "sum". 21 | loss_weight (float, optional): The weight of the loss. Defaults to 1.0 22 | """ 23 | 24 | def __init__(self, reduction='mean', loss_weight=1.0): 25 | super().__init__() 26 | self.reduction = reduction 27 | self.loss_weight = loss_weight 28 | 29 | def forward(self, 30 | pred, 31 | target, 32 | weight=None, 33 | avg_factor=None, 34 | reduction_override=None): 35 | """Forward function of loss. 36 | 37 | Args: 38 | pred (torch.Tensor): The prediction. 39 | target (torch.Tensor): The learning target of the prediction. 40 | weight (torch.Tensor, optional): Weight of the loss for each 41 | prediction. Defaults to None. 42 | avg_factor (int, optional): Average factor that is used to average 43 | the loss. Defaults to None. 44 | reduction_override (str, optional): The reduction method used to 45 | override the original reduction method of the loss. 46 | Defaults to None. 47 | 48 | Returns: 49 | torch.Tensor: The calculated loss 50 | """ 51 | assert reduction_override in (None, 'none', 'mean', 'sum') 52 | reduction = ( 53 | reduction_override if reduction_override else self.reduction) 54 | loss = self.loss_weight * mse_loss( 55 | pred, target, weight, reduction=reduction, avg_factor=avg_factor) 56 | return loss 57 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import mmcv 4 | import torch.nn.functional as F 5 | 6 | 7 | def reduce_loss(loss, reduction): 8 | """Reduce loss as specified. 9 | 10 | Args: 11 | loss (Tensor): Elementwise loss tensor. 12 | reduction (str): Options are "none", "mean" and "sum". 13 | 14 | Return: 15 | Tensor: Reduced loss tensor. 16 | """ 17 | reduction_enum = F._Reduction.get_enum(reduction) 18 | # none: 0, elementwise_mean:1, sum: 2 19 | if reduction_enum == 0: 20 | return loss 21 | elif reduction_enum == 1: 22 | return loss.mean() 23 | elif reduction_enum == 2: 24 | return loss.sum() 25 | 26 | 27 | @mmcv.jit(derivate=True, coderize=True) 28 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 29 | """Apply element-wise weight and reduce loss. 30 | 31 | Args: 32 | loss (Tensor): Element-wise loss. 33 | weight (Tensor): Element-wise weights. 34 | reduction (str): Same as built-in losses of PyTorch. 35 | avg_factor (float): Avarage factor when computing the mean of losses. 36 | 37 | Returns: 38 | Tensor: Processed loss values. 39 | """ 40 | # if weight is specified, apply element-wise weight 41 | if weight is not None: 42 | loss = loss * weight 43 | 44 | # if avg_factor is not specified, just reduce the loss 45 | if avg_factor is None: 46 | loss = reduce_loss(loss, reduction) 47 | else: 48 | # if reduction is mean, then average the loss by avg_factor 49 | if reduction == 'mean': 50 | loss = loss.sum() / avg_factor 51 | # if reduction is 'none', then do nothing, otherwise raise an error 52 | elif reduction != 'none': 53 | raise ValueError('avg_factor can not be used with reduction="sum"') 54 | return loss 55 | 56 | 57 | def weighted_loss(loss_func): 58 | """Create a weighted version of a given loss function. 59 | 60 | To use this decorator, the loss function must have the signature like 61 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 62 | element-wise loss without any reduction. This decorator will add weight 63 | and reduction arguments to the function. The decorated function will have 64 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 65 | avg_factor=None, **kwargs)`. 66 | 67 | :Example: 68 | 69 | >>> import torch 70 | >>> @weighted_loss 71 | >>> def l1_loss(pred, target): 72 | >>> return (pred - target).abs() 73 | 74 | >>> pred = torch.Tensor([0, 2, 3]) 75 | >>> target = torch.Tensor([1, 1, 1]) 76 | >>> weight = torch.Tensor([1, 0, 1]) 77 | 78 | >>> l1_loss(pred, target) 79 | tensor(1.3333) 80 | >>> l1_loss(pred, target, weight) 81 | tensor(1.) 82 | >>> l1_loss(pred, target, reduction='none') 83 | tensor([1., 1., 2.]) 84 | >>> l1_loss(pred, target, weight, avg_factor=2) 85 | tensor(1.5000) 86 | """ 87 | 88 | @functools.wraps(loss_func) 89 | def wrapper(pred, 90 | target, 91 | weight=None, 92 | reduction='mean', 93 | avg_factor=None, 94 | **kwargs): 95 | # get element-wise loss 96 | loss = loss_func(pred, target, **kwargs) 97 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 98 | return loss 99 | 100 | return wrapper 101 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .channel_mapper import ChannelMapper 3 | from .ct_resnet_neck import CTResNetNeck 4 | from .dilated_encoder import DilatedEncoder 5 | from .fpg import FPG 6 | from .fpn import FPN 7 | from .fpn_carafe import FPN_CARAFE 8 | from .hrfpn import HRFPN 9 | from .nas_fpn import NASFPN 10 | from .nasfcos_fpn import NASFCOS_FPN 11 | from .pafpn import PAFPN 12 | from .rfp import RFP 13 | from .ssd_neck import SSDNeck 14 | from .yolo_neck import YOLOV3Neck 15 | from .cbnet_fpn import CBFPN 16 | 17 | __all__ = [ 18 | 'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 19 | 'NASFCOS_FPN', 'RFP', 'YOLOV3Neck', 'FPG', 'DilatedEncoder', 20 | 'CTResNetNeck', 'SSDNeck', 'CBFPN' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/models/necks/cbnet_fpn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | 5 | from ..builder import NECKS 6 | from .fpn import FPN 7 | from .. import builder 8 | 9 | @NECKS.register_module() 10 | class CBFPN(FPN): 11 | ''' 12 | FPN with weight sharing 13 | which support mutliple outputs from cbnet 14 | ''' 15 | def forward(self, inputs): 16 | if not isinstance(inputs[0], (list, tuple)): 17 | inputs = [inputs] 18 | 19 | if self.training: 20 | outs = [] 21 | for x in inputs: 22 | out = super().forward(x) 23 | outs.append(out) 24 | return outs 25 | else: 26 | out = super().forward(inputs[-1]) 27 | return out 28 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_head import BaseRoIHead 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DIIHead, 3 | DoubleConvFCBBoxHead, SABLHead, SCNetBBoxHead, 4 | Shared2FCBBoxHead, Shared4Conv1FCBBoxHead) 5 | from .cascade_roi_head import CascadeRoIHead 6 | from .double_roi_head import DoubleHeadRoIHead 7 | from .dynamic_roi_head import DynamicRoIHead 8 | from .grid_roi_head import GridRoIHead 9 | from .htc_roi_head import HybridTaskCascadeRoIHead 10 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FeatureRelayHead, 11 | FusedSemanticHead, GlobalContextHead, GridHead, 12 | HTCMaskHead, MaskIoUHead, MaskPointHead, 13 | SCNetMaskHead, SCNetSemanticHead) 14 | from .mask_scoring_roi_head import MaskScoringRoIHead 15 | from .pisa_roi_head import PISARoIHead 16 | from .point_rend_roi_head import PointRendRoIHead 17 | from .roi_extractors import (BaseRoIExtractor, GenericRoIExtractor, 18 | SingleRoIExtractor) 19 | from .scnet_roi_head import SCNetRoIHead 20 | from .shared_heads import ResLayer 21 | from .sparse_roi_head import SparseRoIHead 22 | from .standard_roi_head import StandardRoIHead 23 | from .trident_roi_head import TridentRoIHead 24 | 25 | __all__ = [ 26 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead', 27 | 'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead', 28 | 'ConvFCBBoxHead', 'DIIHead', 'SABLHead', 'Shared2FCBBoxHead', 29 | 'StandardRoIHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 30 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 31 | 'MaskIoUHead', 'BaseRoIExtractor', 'GenericRoIExtractor', 32 | 'SingleRoIExtractor', 'PISARoIHead', 'PointRendRoIHead', 'MaskPointHead', 33 | 'CoarseMaskHead', 'DynamicRoIHead', 'SparseRoIHead', 'TridentRoIHead', 34 | 'SCNetRoIHead', 'SCNetMaskHead', 'SCNetSemanticHead', 'SCNetBBoxHead', 35 | 'FeatureRelayHead', 'GlobalContextHead' 36 | ] 37 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, 3 | Shared4Conv1FCBBoxHead) 4 | from .dii_head import DIIHead 5 | from .double_bbox_head import DoubleConvFCBBoxHead 6 | from .sabl_head import SABLHead 7 | from .scnet_bbox_head import SCNetBBoxHead 8 | 9 | __all__ = [ 10 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 11 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'SABLHead', 'DIIHead', 12 | 'SCNetBBoxHead' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/scnet_bbox_head.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.builder import HEADS 2 | from .convfc_bbox_head import ConvFCBBoxHead 3 | 4 | 5 | @HEADS.register_module() 6 | class SCNetBBoxHead(ConvFCBBoxHead): 7 | """BBox head for `SCNet `_. 8 | 9 | This inherits ``ConvFCBBoxHead`` with modified forward() function, allow us 10 | to get intermediate shared feature. 11 | """ 12 | 13 | def _forward_shared(self, x): 14 | """Forward function for shared part.""" 15 | if self.num_shared_convs > 0: 16 | for conv in self.shared_convs: 17 | x = conv(x) 18 | 19 | if self.num_shared_fcs > 0: 20 | if self.with_avg_pool: 21 | x = self.avg_pool(x) 22 | 23 | x = x.flatten(1) 24 | 25 | for fc in self.shared_fcs: 26 | x = self.relu(fc(x)) 27 | 28 | return x 29 | 30 | def _forward_cls_reg(self, x): 31 | """Forward function for classification and regression parts.""" 32 | x_cls = x 33 | x_reg = x 34 | 35 | for conv in self.cls_convs: 36 | x_cls = conv(x_cls) 37 | if x_cls.dim() > 2: 38 | if self.with_avg_pool: 39 | x_cls = self.avg_pool(x_cls) 40 | x_cls = x_cls.flatten(1) 41 | for fc in self.cls_fcs: 42 | x_cls = self.relu(fc(x_cls)) 43 | 44 | for conv in self.reg_convs: 45 | x_reg = conv(x_reg) 46 | if x_reg.dim() > 2: 47 | if self.with_avg_pool: 48 | x_reg = self.avg_pool(x_reg) 49 | x_reg = x_reg.flatten(1) 50 | for fc in self.reg_fcs: 51 | x_reg = self.relu(fc(x_reg)) 52 | 53 | cls_score = self.fc_cls(x_cls) if self.with_cls else None 54 | bbox_pred = self.fc_reg(x_reg) if self.with_reg else None 55 | 56 | return cls_score, bbox_pred 57 | 58 | def forward(self, x, return_shared_feat=False): 59 | """Forward function. 60 | 61 | Args: 62 | x (Tensor): input features 63 | return_shared_feat (bool): If True, return cls-reg-shared feature. 64 | 65 | Return: 66 | out (tuple[Tensor]): contain ``cls_score`` and ``bbox_pred``, 67 | if ``return_shared_feat`` is True, append ``x_shared`` to the 68 | returned tuple. 69 | """ 70 | x_shared = self._forward_shared(x) 71 | out = self._forward_cls_reg(x_shared) 72 | 73 | if return_shared_feat: 74 | out += (x_shared, ) 75 | 76 | return out 77 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/double_roi_head.py: -------------------------------------------------------------------------------- 1 | from ..builder import HEADS 2 | from .standard_roi_head import StandardRoIHead 3 | 4 | 5 | @HEADS.register_module() 6 | class DoubleHeadRoIHead(StandardRoIHead): 7 | """RoI head for Double Head RCNN. 8 | 9 | https://arxiv.org/abs/1904.06493 10 | """ 11 | 12 | def __init__(self, reg_roi_scale_factor, **kwargs): 13 | super(DoubleHeadRoIHead, self).__init__(**kwargs) 14 | self.reg_roi_scale_factor = reg_roi_scale_factor 15 | 16 | def _bbox_forward(self, x, rois): 17 | """Box head forward function used in both training and testing time.""" 18 | bbox_cls_feats = self.bbox_roi_extractor( 19 | x[:self.bbox_roi_extractor.num_inputs], rois) 20 | bbox_reg_feats = self.bbox_roi_extractor( 21 | x[:self.bbox_roi_extractor.num_inputs], 22 | rois, 23 | roi_scale_factor=self.reg_roi_scale_factor) 24 | if self.with_shared_head: 25 | bbox_cls_feats = self.shared_head(bbox_cls_feats) 26 | bbox_reg_feats = self.shared_head(bbox_reg_feats) 27 | cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats) 28 | 29 | bbox_results = dict( 30 | cls_score=cls_score, 31 | bbox_pred=bbox_pred, 32 | bbox_feats=bbox_cls_feats) 33 | return bbox_results 34 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .coarse_mask_head import CoarseMaskHead 2 | from .fcn_mask_head import FCNMaskHead 3 | from .feature_relay_head import FeatureRelayHead 4 | from .fused_semantic_head import FusedSemanticHead 5 | from .global_context_head import GlobalContextHead 6 | from .grid_head import GridHead 7 | from .htc_mask_head import HTCMaskHead 8 | from .mask_point_head import MaskPointHead 9 | from .maskiou_head import MaskIoUHead 10 | from .scnet_mask_head import SCNetMaskHead 11 | from .scnet_semantic_head import SCNetSemanticHead 12 | 13 | __all__ = [ 14 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 15 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead', 'SCNetMaskHead', 16 | 'SCNetSemanticHead', 'GlobalContextHead', 'FeatureRelayHead' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/feature_relay_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.runner import BaseModule, auto_fp16 3 | 4 | from mmdet.models.builder import HEADS 5 | 6 | 7 | @HEADS.register_module() 8 | class FeatureRelayHead(BaseModule): 9 | """Feature Relay Head used in `SCNet `_. 10 | 11 | Args: 12 | in_channels (int, optional): number of input channels. Default: 256. 13 | conv_out_channels (int, optional): number of output channels before 14 | classification layer. Default: 256. 15 | roi_feat_size (int, optional): roi feat size at box head. Default: 7. 16 | scale_factor (int, optional): scale factor to match roi feat size 17 | at mask head. Default: 2. 18 | init_cfg (dict or list[dict], optional): Initialization config dict. 19 | """ 20 | 21 | def __init__(self, 22 | in_channels=1024, 23 | out_conv_channels=256, 24 | roi_feat_size=7, 25 | scale_factor=2, 26 | init_cfg=dict(type='Kaiming', layer='Linear')): 27 | super(FeatureRelayHead, self).__init__(init_cfg) 28 | assert isinstance(roi_feat_size, int) 29 | 30 | self.in_channels = in_channels 31 | self.out_conv_channels = out_conv_channels 32 | self.roi_feat_size = roi_feat_size 33 | self.out_channels = (roi_feat_size**2) * out_conv_channels 34 | self.scale_factor = scale_factor 35 | self.fp16_enabled = False 36 | 37 | self.fc = nn.Linear(self.in_channels, self.out_channels) 38 | self.upsample = nn.Upsample( 39 | scale_factor=scale_factor, mode='bilinear', align_corners=True) 40 | 41 | @auto_fp16() 42 | def forward(self, x): 43 | """Forward function.""" 44 | N, in_C = x.shape 45 | if N > 0: 46 | out_C = self.out_conv_channels 47 | out_HW = self.roi_feat_size 48 | x = self.fc(x) 49 | x = x.reshape(N, out_C, out_HW, out_HW) 50 | x = self.upsample(x) 51 | return x 52 | return None 53 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import ConvModule 2 | 3 | from mmdet.models.builder import HEADS 4 | from .fcn_mask_head import FCNMaskHead 5 | 6 | 7 | @HEADS.register_module() 8 | class HTCMaskHead(FCNMaskHead): 9 | 10 | def __init__(self, with_conv_res=True, *args, **kwargs): 11 | super(HTCMaskHead, self).__init__(*args, **kwargs) 12 | self.with_conv_res = with_conv_res 13 | if self.with_conv_res: 14 | self.conv_res = ConvModule( 15 | self.conv_out_channels, 16 | self.conv_out_channels, 17 | 1, 18 | conv_cfg=self.conv_cfg, 19 | norm_cfg=self.norm_cfg) 20 | 21 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 22 | if res_feat is not None: 23 | assert self.with_conv_res 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/scnet_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.builder import HEADS 2 | from mmdet.models.utils import ResLayer, SimplifiedBasicBlock 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module() 7 | class SCNetMaskHead(FCNMaskHead): 8 | """Mask head for `SCNet `_. 9 | 10 | Args: 11 | conv_to_res (bool, optional): if True, change the conv layers to 12 | ``SimplifiedBasicBlock``. 13 | """ 14 | 15 | def __init__(self, conv_to_res=True, **kwargs): 16 | super(SCNetMaskHead, self).__init__(**kwargs) 17 | self.conv_to_res = conv_to_res 18 | if conv_to_res: 19 | assert self.conv_kernel_size == 3 20 | self.num_res_blocks = self.num_convs // 2 21 | self.convs = ResLayer( 22 | SimplifiedBasicBlock, 23 | self.in_channels, 24 | self.conv_out_channels, 25 | self.num_res_blocks, 26 | conv_cfg=self.conv_cfg, 27 | norm_cfg=self.norm_cfg) 28 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/scnet_semantic_head.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.builder import HEADS 2 | from mmdet.models.utils import ResLayer, SimplifiedBasicBlock 3 | from .fused_semantic_head import FusedSemanticHead 4 | 5 | 6 | @HEADS.register_module() 7 | class SCNetSemanticHead(FusedSemanticHead): 8 | """Mask head for `SCNet `_. 9 | 10 | Args: 11 | conv_to_res (bool, optional): if True, change the conv layers to 12 | ``SimplifiedBasicBlock``. 13 | """ 14 | 15 | def __init__(self, conv_to_res=True, **kwargs): 16 | super(SCNetSemanticHead, self).__init__(**kwargs) 17 | self.conv_to_res = conv_to_res 18 | if self.conv_to_res: 19 | num_res_blocks = self.num_convs // 2 20 | self.convs = ResLayer( 21 | SimplifiedBasicBlock, 22 | self.in_channels, 23 | self.conv_out_channels, 24 | num_res_blocks, 25 | conv_cfg=self.conv_cfg, 26 | norm_cfg=self.norm_cfg) 27 | self.num_convs = num_res_blocks 28 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_extractor import BaseRoIExtractor 2 | from .generic_roi_extractor import GenericRoIExtractor 3 | from .single_level_roi_extractor import SingleRoIExtractor 4 | 5 | __all__ = ['BaseRoIExtractor', 'SingleRoIExtractor', 'GenericRoIExtractor'] 6 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv import ops 6 | from mmcv.runner import BaseModule 7 | 8 | 9 | class BaseRoIExtractor(BaseModule, metaclass=ABCMeta): 10 | """Base class for RoI extractor. 11 | 12 | Args: 13 | roi_layer (dict): Specify RoI layer type and arguments. 14 | out_channels (int): Output channels of RoI layers. 15 | featmap_strides (int): Strides of input feature maps. 16 | init_cfg (dict or list[dict], optional): Initialization config dict. 17 | Default: None 18 | """ 19 | 20 | def __init__(self, 21 | roi_layer, 22 | out_channels, 23 | featmap_strides, 24 | init_cfg=None): 25 | super(BaseRoIExtractor, self).__init__(init_cfg) 26 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 27 | self.out_channels = out_channels 28 | self.featmap_strides = featmap_strides 29 | self.fp16_enabled = False 30 | 31 | @property 32 | def num_inputs(self): 33 | """int: Number of input feature maps.""" 34 | return len(self.featmap_strides) 35 | 36 | def build_roi_layers(self, layer_cfg, featmap_strides): 37 | """Build RoI operator to extract feature from each level feature map. 38 | 39 | Args: 40 | layer_cfg (dict): Dictionary to construct and config RoI layer 41 | operation. Options are modules under ``mmcv/ops`` such as 42 | ``RoIAlign``. 43 | featmap_strides (List[int]): The stride of input feature map w.r.t 44 | to the original image size, which would be used to scale RoI 45 | coordinate (original image coordinate system) to feature 46 | coordinate system. 47 | 48 | Returns: 49 | nn.ModuleList: The RoI extractor modules for each level feature 50 | map. 51 | """ 52 | 53 | cfg = layer_cfg.copy() 54 | layer_type = cfg.pop('type') 55 | assert hasattr(ops, layer_type) 56 | layer_cls = getattr(ops, layer_type) 57 | roi_layers = nn.ModuleList( 58 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 59 | return roi_layers 60 | 61 | def roi_rescale(self, rois, scale_factor): 62 | """Scale RoI coordinates by scale factor. 63 | 64 | Args: 65 | rois (torch.Tensor): RoI (Region of Interest), shape (n, 5) 66 | scale_factor (float): Scale factor that RoI will be multiplied by. 67 | 68 | Returns: 69 | torch.Tensor: Scaled RoI. 70 | """ 71 | 72 | cx = (rois[:, 1] + rois[:, 3]) * 0.5 73 | cy = (rois[:, 2] + rois[:, 4]) * 0.5 74 | w = rois[:, 3] - rois[:, 1] 75 | h = rois[:, 4] - rois[:, 2] 76 | new_w = w * scale_factor 77 | new_h = h * scale_factor 78 | x1 = cx - new_w * 0.5 79 | x2 = cx + new_w * 0.5 80 | y1 = cy - new_h * 0.5 81 | y2 = cy + new_h * 0.5 82 | new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1) 83 | return new_rois 84 | 85 | @abstractmethod 86 | def forward(self, feats, rois, roi_scale_factor=None): 87 | pass 88 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn.bricks import build_plugin_layer 2 | from mmcv.runner import force_fp32 3 | 4 | from mmdet.models.builder import ROI_EXTRACTORS 5 | from .base_roi_extractor import BaseRoIExtractor 6 | 7 | 8 | @ROI_EXTRACTORS.register_module() 9 | class GenericRoIExtractor(BaseRoIExtractor): 10 | """Extract RoI features from all level feature maps levels. 11 | 12 | This is the implementation of `A novel Region of Interest Extraction Layer 13 | for Instance Segmentation `_. 14 | 15 | Args: 16 | aggregation (str): The method to aggregate multiple feature maps. 17 | Options are 'sum', 'concat'. Default: 'sum'. 18 | pre_cfg (dict | None): Specify pre-processing modules. Default: None. 19 | post_cfg (dict | None): Specify post-processing modules. Default: None. 20 | kwargs (keyword arguments): Arguments that are the same 21 | as :class:`BaseRoIExtractor`. 22 | """ 23 | 24 | def __init__(self, 25 | aggregation='sum', 26 | pre_cfg=None, 27 | post_cfg=None, 28 | **kwargs): 29 | super(GenericRoIExtractor, self).__init__(**kwargs) 30 | 31 | assert aggregation in ['sum', 'concat'] 32 | 33 | self.aggregation = aggregation 34 | self.with_post = post_cfg is not None 35 | self.with_pre = pre_cfg is not None 36 | # build pre/post processing modules 37 | if self.with_post: 38 | self.post_module = build_plugin_layer(post_cfg, '_post_module')[1] 39 | if self.with_pre: 40 | self.pre_module = build_plugin_layer(pre_cfg, '_pre_module')[1] 41 | 42 | @force_fp32(apply_to=('feats', ), out_fp16=True) 43 | def forward(self, feats, rois, roi_scale_factor=None): 44 | """Forward function.""" 45 | if len(feats) == 1: 46 | return self.roi_layers[0](feats[0], rois) 47 | 48 | out_size = self.roi_layers[0].output_size 49 | num_levels = len(feats) 50 | roi_feats = feats[0].new_zeros( 51 | rois.size(0), self.out_channels, *out_size) 52 | 53 | # some times rois is an empty tensor 54 | if roi_feats.shape[0] == 0: 55 | return roi_feats 56 | 57 | if roi_scale_factor is not None: 58 | rois = self.roi_rescale(rois, roi_scale_factor) 59 | 60 | # mark the starting channels for concat mode 61 | start_channels = 0 62 | for i in range(num_levels): 63 | roi_feats_t = self.roi_layers[i](feats[i], rois) 64 | end_channels = start_channels + roi_feats_t.size(1) 65 | if self.with_pre: 66 | # apply pre-processing to a RoI extracted from each layer 67 | roi_feats_t = self.pre_module(roi_feats_t) 68 | if self.aggregation == 'sum': 69 | # and sum them all 70 | roi_feats += roi_feats_t 71 | else: 72 | # and concat them along channel dimension 73 | roi_feats[:, start_channels:end_channels] = roi_feats_t 74 | # update channels starting position 75 | start_channels = end_channels 76 | # check if concat channels match at the end 77 | if self.aggregation == 'concat': 78 | assert start_channels == self.out_channels 79 | 80 | if self.with_post: 81 | # apply post-processing before return the result 82 | roi_feats = self.post_module(roi_feats) 83 | return roi_feats 84 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.runner import BaseModule, auto_fp16 5 | 6 | from mmdet.models.backbones import ResNet 7 | from mmdet.models.builder import SHARED_HEADS 8 | from mmdet.models.utils import ResLayer as _ResLayer 9 | 10 | 11 | @SHARED_HEADS.register_module() 12 | class ResLayer(BaseModule): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None, 24 | pretrained=None, 25 | init_cfg=None): 26 | super(ResLayer, self).__init__(init_cfg) 27 | 28 | self.norm_eval = norm_eval 29 | self.norm_cfg = norm_cfg 30 | self.stage = stage 31 | self.fp16_enabled = False 32 | block, stage_blocks = ResNet.arch_settings[depth] 33 | stage_block = stage_blocks[stage] 34 | planes = 64 * 2**stage 35 | inplanes = 64 * 2**(stage - 1) * block.expansion 36 | 37 | res_layer = _ResLayer( 38 | block, 39 | inplanes, 40 | planes, 41 | stage_block, 42 | stride=stride, 43 | dilation=dilation, 44 | style=style, 45 | with_cp=with_cp, 46 | norm_cfg=self.norm_cfg, 47 | dcn=dcn) 48 | self.add_module(f'layer{stage + 1}', res_layer) 49 | 50 | assert not (init_cfg and pretrained), \ 51 | 'init_cfg and pretrained cannot be setting at the same time' 52 | if isinstance(pretrained, str): 53 | warnings.warn('DeprecationWarning: pretrained is a deprecated, ' 54 | 'please use "init_cfg" instead') 55 | self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) 56 | elif pretrained is None: 57 | if init_cfg is None: 58 | self.init_cfg = [ 59 | dict(type='Kaiming', layer='Conv2d'), 60 | dict( 61 | type='Constant', 62 | val=1, 63 | layer=['_BatchNorm', 'GroupNorm']) 64 | ] 65 | else: 66 | raise TypeError('pretrained must be a str or None') 67 | 68 | @auto_fp16() 69 | def forward(self, x): 70 | res_layer = getattr(self, f'layer{self.stage + 1}') 71 | out = res_layer(x) 72 | return out 73 | 74 | def train(self, mode=True): 75 | super(ResLayer, self).train(mode) 76 | if self.norm_eval: 77 | for m in self.modules(): 78 | if isinstance(m, nn.BatchNorm2d): 79 | m.eval() 80 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_linear_layer, build_transformer 2 | from .gaussian_target import gaussian_radius, gen_gaussian_target 3 | from .inverted_residual import InvertedResidual 4 | from .make_divisible import make_divisible 5 | from .normed_predictor import NormedConv2d, NormedLinear 6 | from .positional_encoding import (LearnedPositionalEncoding, 7 | SinePositionalEncoding) 8 | from .res_layer import ResLayer, SimplifiedBasicBlock 9 | from .se_layer import SELayer 10 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer, 11 | DynamicConv, Transformer) 12 | 13 | __all__ = [ 14 | 'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 15 | 'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'Transformer', 16 | 'build_transformer', 'build_linear_layer', 'SinePositionalEncoding', 17 | 'LearnedPositionalEncoding', 'DynamicConv', 'SimplifiedBasicBlock', 18 | 'NormedLinear', 'NormedConv2d', 'make_divisible', 'InvertedResidual', 19 | 'SELayer' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | TRANSFORMER = Registry('Transformer') 5 | LINEAR_LAYERS = Registry('linear layers') 6 | 7 | 8 | def build_transformer(cfg, default_args=None): 9 | """Builder for Transformer.""" 10 | return build_from_cfg(cfg, TRANSFORMER, default_args) 11 | 12 | 13 | LINEAR_LAYERS.register_module('Linear', module=nn.Linear) 14 | 15 | 16 | def build_linear_layer(cfg, *args, **kwargs): 17 | """Build linear layer. 18 | Args: 19 | cfg (None or dict): The linear layer config, which should contain: 20 | - type (str): Layer type. 21 | - layer args: Args needed to instantiate an linear layer. 22 | args (argument list): Arguments passed to the `__init__` 23 | method of the corresponding linear layer. 24 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 25 | method of the corresponding linear layer. 26 | Returns: 27 | nn.Module: Created linear layer. 28 | """ 29 | if cfg is None: 30 | cfg_ = dict(type='Linear') 31 | else: 32 | if not isinstance(cfg, dict): 33 | raise TypeError('cfg must be a dict') 34 | if 'type' not in cfg: 35 | raise KeyError('the cfg dict must contain the key "type"') 36 | cfg_ = cfg.copy() 37 | 38 | layer_type = cfg_.pop('type') 39 | if layer_type not in LINEAR_LAYERS: 40 | raise KeyError(f'Unrecognized linear type {layer_type}') 41 | else: 42 | linear_layer = LINEAR_LAYERS.get(layer_type) 43 | 44 | layer = linear_layer(*args, **kwargs, **cfg_) 45 | 46 | return layer 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 2 | """Make divisible function. 3 | 4 | This function rounds the channel number to the nearest value that can be 5 | divisible by the divisor. It is taken from the original tf repo. It ensures 6 | that all layers have a channel number that is divisible by divisor. It can 7 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 8 | 9 | Args: 10 | value (int): The original channel number. 11 | divisor (int): The divisor to fully divide the channel number. 12 | min_value (int): The minimum value of the output channel. 13 | Default: None, means that the minimum value equal to the divisor. 14 | min_ratio (float): The minimum ratio of the rounded channel number to 15 | the original channel number. Default: 0.9. 16 | 17 | Returns: 18 | int: The modified output channel number. 19 | """ 20 | 21 | if min_value is None: 22 | min_value = divisor 23 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 24 | # Make sure that round down does not go down by more than (1-min_ratio). 25 | if new_value < min_ratio * value: 26 | new_value += divisor 27 | return new_value 28 | -------------------------------------------------------------------------------- /mmdet/models/utils/normed_predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn import CONV_LAYERS 5 | 6 | from .builder import LINEAR_LAYERS 7 | 8 | 9 | @LINEAR_LAYERS.register_module(name='NormedLinear') 10 | class NormedLinear(nn.Linear): 11 | """Normalized Linear Layer. 12 | 13 | Args: 14 | tempeature (float, optional): Tempeature term. Default to 20. 15 | power (int, optional): Power term. Default to 1.0. 16 | eps (float, optional): The minimal value of divisor to 17 | keep numerical stability. Default to 1e-6. 18 | """ 19 | 20 | def __init__(self, *args, tempearture=20, power=1.0, eps=1e-6, **kwargs): 21 | super(NormedLinear, self).__init__(*args, **kwargs) 22 | self.tempearture = tempearture 23 | self.power = power 24 | self.eps = eps 25 | self.init_weights() 26 | 27 | def init_weights(self): 28 | nn.init.normal_(self.weight, mean=0, std=0.01) 29 | if self.bias is not None: 30 | nn.init.constant_(self.bias, 0) 31 | 32 | def forward(self, x): 33 | weight_ = self.weight / ( 34 | self.weight.norm(dim=1, keepdim=True).pow(self.power) + self.eps) 35 | x_ = x / (x.norm(dim=1, keepdim=True).pow(self.power) + self.eps) 36 | x_ = x_ * self.tempearture 37 | 38 | return F.linear(x_, weight_, self.bias) 39 | 40 | 41 | @CONV_LAYERS.register_module(name='NormedConv2d') 42 | class NormedConv2d(nn.Conv2d): 43 | """Normalized Conv2d Layer. 44 | 45 | Args: 46 | tempeature (float, optional): Tempeature term. Default to 20. 47 | power (int, optional): Power term. Default to 1.0. 48 | eps (float, optional): The minimal value of divisor to 49 | keep numerical stability. Default to 1e-6. 50 | norm_over_kernel (bool, optional): Normalize over kernel. 51 | Default to False. 52 | """ 53 | 54 | def __init__(self, 55 | *args, 56 | tempearture=20, 57 | power=1.0, 58 | eps=1e-6, 59 | norm_over_kernel=False, 60 | **kwargs): 61 | super(NormedConv2d, self).__init__(*args, **kwargs) 62 | self.tempearture = tempearture 63 | self.power = power 64 | self.norm_over_kernel = norm_over_kernel 65 | self.eps = eps 66 | 67 | def forward(self, x): 68 | if not self.norm_over_kernel: 69 | weight_ = self.weight / ( 70 | self.weight.norm(dim=1, keepdim=True).pow(self.power) + 71 | self.eps) 72 | else: 73 | weight_ = self.weight / ( 74 | self.weight.view(self.weight.size(0), -1).norm( 75 | dim=1, keepdim=True).pow(self.power)[..., None, None] + 76 | self.eps) 77 | x_ = x / (x.norm(dim=1, keepdim=True).pow(self.power) + self.eps) 78 | x_ = x_ * self.tempearture 79 | 80 | if hasattr(self, 'conv2d_forward'): 81 | x_ = self.conv2d_forward(x_, weight_) 82 | else: 83 | if torch.__version__ >= '1.8': 84 | x_ = self._conv_forward(x_, weight_, self.bias) 85 | else: 86 | x_ = self._conv_forward(x_, weight_) 87 | return x_ 88 | -------------------------------------------------------------------------------- /mmdet/models/utils/se_layer.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import torch.nn as nn 3 | from mmcv.cnn import ConvModule 4 | from mmcv.runner import BaseModule 5 | 6 | 7 | class SELayer(BaseModule): 8 | """Squeeze-and-Excitation Module. 9 | 10 | Args: 11 | channels (int): The input (and output) channels of the SE layer. 12 | ratio (int): Squeeze ratio in SELayer, the intermediate channel will be 13 | ``int(channels/ratio)``. Default: 16. 14 | conv_cfg (None or dict): Config dict for convolution layer. 15 | Default: None, which means using conv2d. 16 | act_cfg (dict or Sequence[dict]): Config dict for activation layer. 17 | If act_cfg is a dict, two activation layers will be configurated 18 | by this dict. If act_cfg is a sequence of dicts, the first 19 | activation layer will be configurated by the first dict and the 20 | second activation layer will be configurated by the second dict. 21 | Default: (dict(type='ReLU'), dict(type='Sigmoid')) 22 | init_cfg (dict or list[dict], optional): Initialization config dict. 23 | Default: None 24 | """ 25 | 26 | def __init__(self, 27 | channels, 28 | ratio=16, 29 | conv_cfg=None, 30 | act_cfg=(dict(type='ReLU'), dict(type='Sigmoid')), 31 | init_cfg=None): 32 | super(SELayer, self).__init__(init_cfg) 33 | if isinstance(act_cfg, dict): 34 | act_cfg = (act_cfg, act_cfg) 35 | assert len(act_cfg) == 2 36 | assert mmcv.is_tuple_of(act_cfg, dict) 37 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 38 | self.conv1 = ConvModule( 39 | in_channels=channels, 40 | out_channels=int(channels / ratio), 41 | kernel_size=1, 42 | stride=1, 43 | conv_cfg=conv_cfg, 44 | act_cfg=act_cfg[0]) 45 | self.conv2 = ConvModule( 46 | in_channels=int(channels / ratio), 47 | out_channels=channels, 48 | kernel_size=1, 49 | stride=1, 50 | conv_cfg=conv_cfg, 51 | act_cfg=act_cfg[1]) 52 | 53 | def forward(self, x): 54 | out = self.global_avgpool(x) 55 | out = self.conv1(out) 56 | out = self.conv2(out) 57 | return x * out 58 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | from .optimizer import DistOptimizerHook 4 | 5 | __all__ = ['get_root_logger', 'collect_env', 'DistOptimizerHook'] 6 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import collect_env as collect_base_env 2 | from mmcv.utils import get_git_hash 3 | 4 | import mmdet 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['MMDetection'] = mmdet.__version__ + '+' + get_git_hash()[:7] 11 | return env_info 12 | 13 | 14 | if __name__ == '__main__': 15 | for name, val in collect_env().items(): 16 | print(f'{name}: {val}') 17 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get root logger. 8 | 9 | Args: 10 | log_file (str, optional): File path of log. Defaults to None. 11 | log_level (int, optional): The level of logger. 12 | Defaults to logging.INFO. 13 | 14 | Returns: 15 | :obj:`logging.Logger`: The obtained logger 16 | """ 17 | logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level) 18 | 19 | return logger 20 | -------------------------------------------------------------------------------- /mmdet/utils/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | try: 3 | import apex 4 | except: 5 | print('apex is not installed') 6 | 7 | 8 | @HOOKS.register_module() 9 | class DistOptimizerHook(OptimizerHook): 10 | """Optimizer hook for distributed training.""" 11 | 12 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 13 | self.grad_clip = grad_clip 14 | self.coalesce = coalesce 15 | self.bucket_size_mb = bucket_size_mb 16 | self.update_interval = update_interval 17 | self.use_fp16 = use_fp16 18 | 19 | def before_run(self, runner): 20 | runner.optimizer.zero_grad() 21 | 22 | def after_train_iter(self, runner): 23 | runner.outputs['loss'] /= self.update_interval 24 | if self.use_fp16: 25 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 26 | scaled_loss.backward() 27 | else: 28 | runner.outputs['loss'].backward() 29 | if self.every_n_iters(runner, self.update_interval): 30 | if self.grad_clip is not None: 31 | self.clip_grads(runner.model.parameters()) 32 | runner.optimizer.step() 33 | runner.optimizer.zero_grad() 34 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of code 18 | suitable for async implementation. 19 | """ 20 | if (not enabled) or not torch.cuda.is_available(): 21 | yield 22 | return 23 | stream = stream if stream else torch.cuda.current_stream() 24 | end_stream = end_stream if end_stream else stream 25 | start = torch.cuda.Event(enable_timing=True) 26 | end = torch.cuda.Event(enable_timing=True) 27 | stream.record_event(start) 28 | try: 29 | cpu_start = time.monotonic() 30 | yield 31 | finally: 32 | cpu_end = time.monotonic() 33 | end_stream.record_event(end) 34 | end.synchronize() 35 | cpu_time = (cpu_end - cpu_start) * 1000 36 | gpu_time = start.elapsed_time(end) 37 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 38 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 39 | print(msg, end_stream) 40 | -------------------------------------------------------------------------------- /mmdet/utils/util_random.py: -------------------------------------------------------------------------------- 1 | """Helpers for random number generators.""" 2 | import numpy as np 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """Coerces input into a random number generator. 7 | 8 | If the input is None, then a global random state is returned. 9 | 10 | If the input is a numeric value, then that is used as a seed to construct a 11 | random state. Otherwise the input is returned as-is. 12 | 13 | Adapted from [1]_. 14 | 15 | Args: 16 | rng (int | numpy.random.RandomState | None): 17 | if None, then defaults to the global rng. Otherwise this can be an 18 | integer or a RandomState class 19 | Returns: 20 | (numpy.random.RandomState) : rng - 21 | a numpy random number generator 22 | 23 | References: 24 | .. [1] https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 # noqa: E501 25 | """ 26 | 27 | if rng is None: 28 | rng = np.random.mtrand._rand 29 | elif isinstance(rng, int): 30 | rng = np.random.RandomState(rng) 31 | else: 32 | rng = rng 33 | return rng 34 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '2.14.0' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /model-index.yml: -------------------------------------------------------------------------------- 1 | Import: 2 | - configs/atss/metafile.yml 3 | - configs/autoassign/metafile.yml 4 | - configs/cascade_rcnn/metafile.yml 5 | - configs/centernet/metafile.yml 6 | - configs/centripetalnet/metafile.yml 7 | - configs/cornernet/metafile.yml 8 | - configs/dcn/metafile.yml 9 | - configs/deformable_detr/metafile.yml 10 | - configs/detectors/metafile.yml 11 | - configs/detr/metafile.yml 12 | - configs/double_heads/metafile.yml 13 | - configs/dynamic_rcnn/metafile.yml 14 | - configs/empirical_attention/metafile.yml 15 | - configs/faster_rcnn/metafile.yml 16 | - configs/fcos/metafile.yml 17 | - configs/foveabox/metafile.yml 18 | - configs/fp16/metafile.yml 19 | - configs/fpg/metafile.yml 20 | - configs/free_anchor/metafile.yml 21 | - configs/fsaf/metafile.yml 22 | - configs/gcnet/metafile.yml 23 | - configs/gfl/metafile.yml 24 | - configs/ghm/metafile.yml 25 | - configs/gn/metafile.yml 26 | - configs/gn+ws/metafile.yml 27 | - configs/grid_rcnn/metafile.yml 28 | - configs/groie/metafile.yml 29 | - configs/guided_anchoring/metafile.yml 30 | - configs/hrnet/metafile.yml 31 | - configs/htc/metafile.yml 32 | - configs/instaboost/metafile.yml 33 | - configs/ld/metafile.yml 34 | - configs/libra_rcnn/metafile.yml 35 | - configs/mask_rcnn/metafile.yml 36 | - configs/ms_rcnn/metafile.yml 37 | - configs/nas_fcos/metafile.yml 38 | - configs/nas_fpn/metafile.yml 39 | - configs/paa/metafile.yml 40 | - configs/pafpn/metafile.yml 41 | - configs/pisa/metafile.yml 42 | - configs/point_rend/metafile.yml 43 | - configs/regnet/metafile.yml 44 | - configs/reppoints/metafile.yml 45 | - configs/res2net/metafile.yml 46 | - configs/resnest/metafile.yml 47 | - configs/retinanet/metafile.yml 48 | - configs/sabl/metafile.yml 49 | - configs/scnet/metafile.yml 50 | - configs/scratch/metafile.yml 51 | - configs/sparse_rcnn/metafile.yml 52 | - configs/ssd/metafile.yml 53 | - configs/tridentnet/metafile.yml 54 | - configs/vfnet/metafile.yml 55 | - configs/yolact/metafile.yml 56 | - configs/yolo/metafile.yml 57 | - configs/yolof/metafile.yml 58 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/mminstall.txt: -------------------------------------------------------------------------------- 1 | mmcv-full>=1.3.3 2 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | scipy 5 | sklearn 6 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | pycocotools; platform_system == "Linux" 4 | pycocotools-windows; platform_system == "Windows" 5 | six 6 | terminaltables 7 | timm 8 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort==4.3.21 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | onnx==1.13.0 9 | onnxruntime==1.5.1 10 | pytest 11 | ubelt 12 | xdoctest>=0.10.0 13 | yapf 14 | -------------------------------------------------------------------------------- /resources/bigdet_statistics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/bigdetection/5c8140523b359bf309913eb0a335a1976497e2a2/resources/bigdet_statistics.png -------------------------------------------------------------------------------- /resources/bigdetection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/bigdetection/5c8140523b359bf309913eb0a335a1976497e2a2/resources/bigdetection.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,seaborn,six,terminaltables,torch,ts,yaml 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /tools/analysis_tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mmcv 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Evaluate metric of the ' 11 | 'results saved in pkl format') 12 | parser.add_argument('config', help='Config of the model') 13 | parser.add_argument('pkl_results', help='Results in pickle format') 14 | parser.add_argument( 15 | '--format-only', 16 | action='store_true', 17 | help='Format the output results without perform evaluation. It is' 18 | 'useful when you want to format the result to a specific format and ' 19 | 'submit it to the test server') 20 | parser.add_argument( 21 | '--eval', 22 | type=str, 23 | nargs='+', 24 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",' 25 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | parser.add_argument( 37 | '--eval-options', 38 | nargs='+', 39 | action=DictAction, 40 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 41 | 'format will be kwargs for dataset.evaluate() function') 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main(): 47 | args = parse_args() 48 | 49 | cfg = Config.fromfile(args.config) 50 | assert args.eval or args.format_only, ( 51 | 'Please specify at least one operation (eval/format the results) with ' 52 | 'the argument "--eval", "--format-only"') 53 | if args.eval and args.format_only: 54 | raise ValueError('--eval and --format_only cannot be both specified') 55 | 56 | if args.cfg_options is not None: 57 | cfg.merge_from_dict(args.cfg_options) 58 | # import modules from string list. 59 | if cfg.get('custom_imports', None): 60 | from mmcv.utils import import_modules_from_strings 61 | import_modules_from_strings(**cfg['custom_imports']) 62 | cfg.data.test.test_mode = True 63 | 64 | dataset = build_dataset(cfg.data.test) 65 | outputs = mmcv.load(args.pkl_results) 66 | 67 | kwargs = {} if args.eval_options is None else args.eval_options 68 | if args.format_only: 69 | dataset.format_results(outputs, **kwargs) 70 | if args.eval: 71 | eval_kwargs = cfg.get('evaluation', {}).copy() 72 | # hard-code way to remove EvalHook args 73 | for key in [ 74 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 75 | 'rule' 76 | ]: 77 | eval_kwargs.pop(key, None) 78 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 79 | print(dataset.evaluate(outputs, **eval_kwargs)) 80 | 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.models import build_detector 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[1280, 800], 22 | help='input image size') 23 | parser.add_argument( 24 | '--cfg-options', 25 | nargs='+', 26 | action=DictAction, 27 | help='override some settings in the used config, the key-value pair ' 28 | 'in xxx=yyy format will be merged into config file. If the value to ' 29 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 30 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 31 | 'Note that the quotation marks are necessary and that no white space ' 32 | 'is allowed.') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | 39 | args = parse_args() 40 | 41 | if len(args.shape) == 1: 42 | input_shape = (3, args.shape[0], args.shape[0]) 43 | elif len(args.shape) == 2: 44 | input_shape = (3, ) + tuple(args.shape) 45 | else: 46 | raise ValueError('invalid input shape') 47 | 48 | cfg = Config.fromfile(args.config) 49 | if args.cfg_options is not None: 50 | cfg.merge_from_dict(args.cfg_options) 51 | # import modules from string list. 52 | if cfg.get('custom_imports', None): 53 | from mmcv.utils import import_modules_from_strings 54 | import_modules_from_strings(**cfg['custom_imports']) 55 | 56 | model = build_detector( 57 | cfg.model, 58 | train_cfg=cfg.get('train_cfg'), 59 | test_cfg=cfg.get('test_cfg')) 60 | if torch.cuda.is_available(): 61 | model.cuda() 62 | model.eval() 63 | 64 | if hasattr(model, 'forward_dummy'): 65 | model.forward = model.forward_dummy 66 | else: 67 | raise NotImplementedError( 68 | 'FLOPs counter is currently not currently supported with {}'. 69 | format(model.__class__.__name__)) 70 | 71 | flops, params = get_model_complexity_info(model, input_shape) 72 | split_line = '=' * 30 73 | print(f'{split_line}\nInput shape: {input_shape}\n' 74 | f'Flops: {flops}\nParams: {params}\n{split_line}') 75 | print('!!!Please be cautious if you use the results in papers. ' 76 | 'You may need to check if all ops are supported and verify that the ' 77 | 'flops computation is correct.') 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /tools/deployment/mmdet_handler.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | 4 | import mmcv 5 | import torch 6 | from ts.torch_handler.base_handler import BaseHandler 7 | 8 | from mmdet.apis import inference_detector, init_detector 9 | 10 | 11 | class MMdetHandler(BaseHandler): 12 | threshold = 0.5 13 | 14 | def initialize(self, context): 15 | properties = context.system_properties 16 | self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' 17 | self.device = torch.device(self.map_location + ':' + 18 | str(properties.get('gpu_id')) if torch.cuda. 19 | is_available() else self.map_location) 20 | self.manifest = context.manifest 21 | 22 | model_dir = properties.get('model_dir') 23 | serialized_file = self.manifest['model']['serializedFile'] 24 | checkpoint = os.path.join(model_dir, serialized_file) 25 | self.config_file = os.path.join(model_dir, 'config.py') 26 | 27 | self.model = init_detector(self.config_file, checkpoint, self.device) 28 | self.initialized = True 29 | 30 | def preprocess(self, data): 31 | images = [] 32 | 33 | for row in data: 34 | image = row.get('data') or row.get('body') 35 | if isinstance(image, str): 36 | image = base64.b64decode(image) 37 | image = mmcv.imfrombytes(image) 38 | images.append(image) 39 | 40 | return images 41 | 42 | def inference(self, data, *args, **kwargs): 43 | results = inference_detector(self.model, data) 44 | return results 45 | 46 | def postprocess(self, data): 47 | # Format output following the example ObjectDetectionHandler format 48 | output = [] 49 | for image_index, image_result in enumerate(data): 50 | output.append([]) 51 | if isinstance(image_result, tuple): 52 | bbox_result, segm_result = image_result 53 | if isinstance(segm_result, tuple): 54 | segm_result = segm_result[0] # ms rcnn 55 | else: 56 | bbox_result, segm_result = image_result, None 57 | 58 | for class_index, class_result in enumerate(bbox_result): 59 | class_name = self.model.CLASSES[class_index] 60 | for bbox in class_result: 61 | bbox_coords = bbox[:-1].tolist() 62 | score = float(bbox[-1]) 63 | if score >= self.threshold: 64 | output[image_index].append({ 65 | class_name: bbox_coords, 66 | 'score': score 67 | }) 68 | 69 | return output 70 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/misc/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | import mmcv 6 | from mmcv import Config, DictAction 7 | 8 | from mmdet.core.utils import mask2ndarray 9 | from mmdet.core.visualization import imshow_det_bboxes 10 | from mmdet.datasets.builder import build_dataset 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Browse a dataset') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--skip-type', 18 | type=str, 19 | nargs='+', 20 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 21 | help='skip some useless pipeline') 22 | parser.add_argument( 23 | '--output-dir', 24 | default=None, 25 | type=str, 26 | help='If there is no display interface, you can save it') 27 | parser.add_argument('--not-show', default=False, action='store_true') 28 | parser.add_argument( 29 | '--show-interval', 30 | type=float, 31 | default=2, 32 | help='the interval of show (s)') 33 | parser.add_argument( 34 | '--cfg-options', 35 | nargs='+', 36 | action=DictAction, 37 | help='override some settings in the used config, the key-value pair ' 38 | 'in xxx=yyy format will be merged into config file. If the value to ' 39 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 40 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 41 | 'Note that the quotation marks are necessary and that no white space ' 42 | 'is allowed.') 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def retrieve_data_cfg(config_path, skip_type, cfg_options): 48 | cfg = Config.fromfile(config_path) 49 | if cfg_options is not None: 50 | cfg.merge_from_dict(cfg_options) 51 | # import modules from string list. 52 | if cfg.get('custom_imports', None): 53 | from mmcv.utils import import_modules_from_strings 54 | import_modules_from_strings(**cfg['custom_imports']) 55 | train_data_cfg = cfg.data.train 56 | train_data_cfg['pipeline'] = [ 57 | x for x in train_data_cfg.pipeline if x['type'] not in skip_type 58 | ] 59 | 60 | return cfg 61 | 62 | 63 | def main(): 64 | args = parse_args() 65 | cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options) 66 | 67 | dataset = build_dataset(cfg.data.train) 68 | 69 | progress_bar = mmcv.ProgressBar(len(dataset)) 70 | 71 | for item in dataset: 72 | filename = os.path.join(args.output_dir, 73 | Path(item['filename']).name 74 | ) if args.output_dir is not None else None 75 | 76 | gt_masks = item.get('gt_masks', None) 77 | if gt_masks is not None: 78 | gt_masks = mask2ndarray(gt_masks) 79 | 80 | imshow_det_bboxes( 81 | item['img'], 82 | item['gt_bboxes'], 83 | item['gt_labels'], 84 | gt_masks, 85 | class_names=dataset.CLASSES, 86 | show=not args.not_show, 87 | wait_time=args.show_interval, 88 | out_file=filename, 89 | bbox_color=(255, 102, 61), 90 | text_color=(255, 102, 61)) 91 | 92 | progress_bar.update() 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import warnings 3 | 4 | from mmcv import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', 12 | nargs='+', 13 | action=DictAction, 14 | help='override some settings in the used config, the key-value pair ' 15 | 'in xxx=yyy format will be merged into config file (deprecate), ' 16 | 'change to --cfg-options instead.') 17 | parser.add_argument( 18 | '--cfg-options', 19 | nargs='+', 20 | action=DictAction, 21 | help='override some settings in the used config, the key-value pair ' 22 | 'in xxx=yyy format will be merged into config file. If the value to ' 23 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 24 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 25 | 'Note that the quotation marks are necessary and that no white space ' 26 | 'is allowed.') 27 | args = parser.parse_args() 28 | 29 | if args.options and args.cfg_options: 30 | raise ValueError( 31 | '--options and --cfg-options cannot be both ' 32 | 'specified, --options is deprecated in favor of --cfg-options') 33 | if args.options: 34 | warnings.warn('--options is deprecated in favor of --cfg-options') 35 | args.cfg_options = args.options 36 | 37 | return args 38 | 39 | 40 | def main(): 41 | args = parse_args() 42 | 43 | cfg = Config.fromfile(args.config) 44 | if args.cfg_options is not None: 45 | cfg.merge_from_dict(args.cfg_options) 46 | # import modules from string list. 47 | if cfg.get('custom_imports', None): 48 | from mmcv.utils import import_modules_from_strings 49 | import_modules_from_strings(**cfg['custom_imports']) 50 | print(f'Config:\n{cfg.pretty_text}') 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | if torch.__version__ >= '1.6': 24 | torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) 25 | else: 26 | torch.save(checkpoint, out_file) 27 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 28 | if out_file.endswith('.pth'): 29 | out_file_name = out_file[:-4] 30 | else: 31 | out_file_name = out_file 32 | final_file = out_file_name + f'-{sha[:8]}.pth' 33 | subprocess.Popen(['mv', out_file, final_file]) 34 | 35 | 36 | def main(): 37 | args = parse_args() 38 | process_checkpoint(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tools/model_converters/selfsup2mmdet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def moco_convert(src, dst): 8 | """Convert keys in pycls pretrained moco models to mmdet style.""" 9 | # load caffe model 10 | moco_model = torch.load(src) 11 | blobs = moco_model['state_dict'] 12 | # convert to pytorch style 13 | state_dict = OrderedDict() 14 | for k, v in blobs.items(): 15 | if not k.startswith('module.encoder_q.'): 16 | continue 17 | old_k = k 18 | k = k.replace('module.encoder_q.', '') 19 | state_dict[k] = v 20 | print(old_k, '->', k) 21 | # save checkpoint 22 | checkpoint = dict() 23 | checkpoint['state_dict'] = state_dict 24 | torch.save(checkpoint, dst) 25 | 26 | 27 | def main(): 28 | parser = argparse.ArgumentParser(description='Convert model keys') 29 | parser.add_argument('src', help='src detectron model path') 30 | parser.add_argument('dst', help='save path') 31 | parser.add_argument( 32 | '--selfsup', type=str, choices=['moco', 'swav'], help='save path') 33 | args = parser.parse_args() 34 | if args.selfsup == 'moco': 35 | moco_convert(args.src, args.dst) 36 | elif args.selfsup == 'swav': 37 | print('SWAV does not need to convert the keys') 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /tools/model_converters/upgrade_ssd_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tempfile 3 | from collections import OrderedDict 4 | 5 | import torch 6 | from mmcv import Config 7 | 8 | 9 | def parse_config(config_strings): 10 | temp_file = tempfile.NamedTemporaryFile() 11 | config_path = f'{temp_file.name}.py' 12 | with open(config_path, 'w') as f: 13 | f.write(config_strings) 14 | 15 | config = Config.fromfile(config_path) 16 | # check whether it is SSD 17 | if config.model.bbox_head.type != 'SSDHead': 18 | raise AssertionError('This is not a SSD model.') 19 | 20 | 21 | def convert(in_file, out_file): 22 | checkpoint = torch.load(in_file) 23 | in_state_dict = checkpoint.pop('state_dict') 24 | out_state_dict = OrderedDict() 25 | meta_info = checkpoint['meta'] 26 | parse_config('#' + meta_info['config']) 27 | for key, value in in_state_dict.items(): 28 | if 'extra' in key: 29 | layer_idx = int(key.split('.')[2]) 30 | new_key = 'neck.extra_layers.{}.{}.conv.'.format( 31 | layer_idx // 2, layer_idx % 2) + key.split('.')[-1] 32 | elif 'l2_norm' in key: 33 | new_key = 'neck.l2_norm.weight' 34 | elif 'bbox_head' in key: 35 | new_key = key[:21] + '.0' + key[21:] 36 | else: 37 | new_key = key 38 | out_state_dict[new_key] = value 39 | checkpoint['state_dict'] = out_state_dict 40 | 41 | if torch.__version__ >= '1.6': 42 | torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) 43 | else: 44 | torch.save(checkpoint, out_file) 45 | 46 | 47 | def main(): 48 | parser = argparse.ArgumentParser(description='Upgrade SSD version') 49 | parser.add_argument('in_file', help='input checkpoint file') 50 | parser.add_argument('out_file', help='output checkpoint file') 51 | 52 | args = parser.parse_args() 53 | convert(args.in_file, args.out_file) 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | --------------------------------------------------------------------------------