├── .gitignore ├── LICENSE ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── cityscapes_detection.py │ │ ├── cityscapes_instance.py │ │ ├── coco_detection.py │ │ ├── coco_instance.py │ │ ├── coco_instance_semantic.py │ │ ├── deepfashion.py │ │ ├── lvis_v0.5_detection.py │ │ ├── lvis_v0.5_detection_shot.py │ │ ├── lvis_v0.5_instance.py │ │ ├── lvis_v1_instance.py │ │ ├── voc0712.py │ │ └── wider_face.py │ ├── default_runtime.py │ ├── models │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ ├── cascade_rcnn_r50_fpn.py │ │ ├── fast_rcnn_r50_fpn.py │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ ├── faster_rcnn_r50_fpn.py │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── retinanet_r50_fpn.py │ │ ├── rpn_r50_caffe_c4.py │ │ ├── rpn_r50_fpn.py │ │ └── ssd300.py │ └── schedules │ │ ├── schedule_1x.py │ │ ├── schedule_20e.py │ │ └── schedule_2x.py └── oln_box │ ├── class_agn_faster_rcnn.py │ └── oln_box.py ├── docker └── Dockerfile ├── docs ├── 1_exist_data_model.md ├── 2_new_data_model.md ├── 3_exist_data_new_model.md ├── Makefile ├── api.rst ├── changelog.md ├── compatibility.md ├── conf.py ├── conventions.md ├── faq.md ├── get_started.md ├── index.rst ├── make.bat ├── model_zoo.md ├── projects.md ├── robustness_benchmarking.md ├── stat.py ├── tutorials │ ├── config.md │ ├── customize_dataset.md │ ├── customize_losses.md │ ├── customize_models.md │ ├── customize_runtime.md │ ├── data_pipeline.md │ ├── finetune.md │ ├── index.rst │ └── pytorch2onnx.md └── useful_tools.md ├── images ├── epic.png └── oln_overview.png ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── builder.py │ │ ├── point_generator.py │ │ └── utils.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── center_region_assigner.py │ │ │ ├── grid_assigner.py │ │ │ ├── hungarian_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ ├── point_assigner.py │ │ │ └── region_assigner.py │ │ ├── builder.py │ │ ├── coder │ │ │ ├── __init__.py │ │ │ ├── base_bbox_coder.py │ │ │ ├── bucketing_bbox_coder.py │ │ │ ├── delta_xywh_bbox_coder.py │ │ │ ├── legacy_delta_xywh_bbox_coder.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ ├── tblr_bbox_coder.py │ │ │ └── yolo_bbox_coder.py │ │ ├── demodata.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── iou2d_calculator.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── match_cost.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── sampling_result.py │ │ │ └── score_hlr_sampler.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── export │ │ ├── __init__.py │ │ └── pytorch2onnx.py │ ├── fp16 │ │ ├── __init__.py │ │ └── deprecated_fp16_utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ ├── structures.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ ├── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py │ └── visualization │ │ ├── __init__.py │ │ └── image.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── coco.py │ ├── coco_split.py │ ├── cocoeval_wrappers.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── auto_augment.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ └── utils.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── detectors_resnet.py │ │ ├── detectors_resnext.py │ │ ├── hourglass.py │ │ ├── hrnet.py │ │ ├── regnet.py │ │ ├── res2net.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── ssd_vgg.py │ │ └── trident_resnet.py │ ├── builder.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── base_dense_head.py │ │ ├── dense_test_mixins.py │ │ ├── oln_rpn_head.py │ │ ├── rpn_head.py │ │ └── rpn_test_mixin.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── faster_rcnn.py │ │ ├── mask_rcnn.py │ │ ├── rpn.py │ │ ├── rpn_detector.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── ae_loss.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── gaussian_focal_loss.py │ │ ├── gfocal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── pisa_loss.py │ │ ├── smooth_l1_loss.py │ │ ├── utils.py │ │ └── varifocal_loss.py │ ├── necks │ │ ├── __init__.py │ │ └── fpn.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── base_roi_head.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ ├── convfc_bbox_head.py │ │ │ └── convfc_bbox_score_head.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── coarse_mask_head.py │ │ │ ├── fcn_mask_head.py │ │ │ ├── fused_semantic_head.py │ │ │ ├── grid_head.py │ │ │ ├── htc_mask_head.py │ │ │ ├── mask_point_head.py │ │ │ └── maskiou_head.py │ │ ├── oln_roi_head.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ ├── base_roi_extractor.py │ │ │ ├── generic_roi_extractor.py │ │ │ └── single_level_roi_extractor.py │ │ ├── shared_heads │ │ │ ├── __init__.py │ │ │ └── res_layer.py │ │ ├── standard_roi_head.py │ │ └── test_mixins.py │ └── utils │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── gaussian_target.py │ │ ├── positional_encoding.py │ │ ├── res_layer.py │ │ └── transformer.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── logger.py │ ├── profiling.py │ └── util_mixins.py └── version.py ├── pytest.ini ├── requirements.txt ├── requirements ├── build.txt ├── docs.txt ├── optional.txt ├── readthedocs.txt ├── runtime.txt └── tests.txt ├── resources ├── coco_test_12510.jpg ├── corruptions_sev_3.png ├── data_pipeline.png ├── loss_curve.png └── mmdet-logo.png ├── setup.cfg ├── setup.py ├── tests ├── async_benchmark.py ├── test_anchor.py ├── test_assigner.py ├── test_async.py ├── test_coder.py ├── test_config.py ├── test_data │ ├── test_dataset.py │ ├── test_formatting.py │ ├── test_img_augment.py │ ├── test_loading.py │ ├── test_models_aug_test.py │ ├── test_rotate.py │ ├── test_sampler.py │ ├── test_shear.py │ ├── test_transform.py │ ├── test_translate.py │ └── test_utils.py ├── test_eval_hook.py ├── test_fp16.py ├── test_iou2d_calculator.py ├── test_masks.py ├── test_misc.py ├── test_models │ ├── test_backbones.py │ ├── test_forward.py │ ├── test_heads.py │ ├── test_losses.py │ ├── test_necks.py │ ├── test_pisa_heads.py │ ├── test_position_encoding.py │ ├── test_roi_extractor.py │ └── test_transformer.py ├── test_version.py └── test_visualization.py └── tools ├── analyze_logs.py ├── analyze_results.py ├── benchmark.py ├── browse_dataset.py ├── coco_error_analysis.py ├── compute_auc.py ├── convert_datasets ├── cityscapes.py └── pascal_voc.py ├── detectron2pytorch.py ├── dist_test_bbox.sh ├── dist_train.sh ├── dist_train_and_test_bbox.sh ├── eval_metric.py ├── get_flops.py ├── print_config.py ├── publish_model.py ├── pytorch2onnx.py ├── regnet2mmdet.py ├── robustness_eval.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── test_robustness.py ├── train.py └── upgrade_model_version.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | data 108 | .vscode 109 | .idea 110 | .DS_Store 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | work_dirs/ 117 | run_scripts/ 118 | 119 | # Pytorch 120 | *.pth 121 | *.py~ 122 | *.sh~ 123 | 124 | # Token 125 | token.txt 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Learning Open-World Object Proposals without Learning to Classify 3 | 4 | ## Pytorch implementation for "Learning Open-World Object Proposals without Learning to Classify" ([RA-L and ICRA 2022](https://arxiv.org/abs/2108.06753))
5 | 6 | [Dahun Kim](https://mcahny.github.io/), [Tsung-Yi Lin](https://scholar.google.com/citations?user=_BPdgV0AAAAJ), [Anelia Angelova](https://scholar.google.co.kr/citations?user=nkmDOPgAAAAJ), [In So Kweon](https://rcv.kaist.ac.kr), and [Weicheng Kuo](https://weichengkuo.github.io/). 7 | 8 | ```bibtex 9 | @article{kim2021oln, 10 | title={Learning Open-World Object Proposals without Learning to Classify}, 11 | author={Kim, Dahun and Lin, Tsung-Yi and Angelova, Anelia and Kweon, In So and Kuo, Weicheng}, 12 | journal={IEEE Robotics and Automation Letters (RA-L)}, 13 | year={2022} 14 | } 15 | ``` 16 | 17 | 18 | ## Introduction 19 | 20 | Humans can recognize novel objects in this image despite having never seen them before. “Is it possible to learn open-world (novel) object proposals?” In this paper we propose **Object Localization Network (OLN)** that learns localization cues instead of foreground vs background classification. Only trained on COCO, OLN is able to propose many novel objects (top) missed by Mask R-CNN (bottom) on an out-of-sample frame in an ego-centric video. 21 | 22 |
23 | 24 | ## Cross-category generalization on COCO 25 | 26 | We train OLN on COCO VOC categories, and test on non-VOC categories. Note our AR@k evaluation does not count those proposals on the 'seen' classes into the budget (k), to avoid evaluating recall on see-class objects. 27 | 28 | | Method | AUC | AR@10 | AR@30 | AR@100 | AR@300 | AR@1000 | Download | 29 | |:--------------:|:-----:|:-----:|:-----:|:------:|:------:|:-------:|:--------:| 30 | | OLN-Box | 24.8 | 18.0 | 26.4 | 33.4 | 39.0 | 45.0 | [model](https://drive.google.com/uc?id=1uL6TRhpSILvWeR6DZ0x9K9VywrQXQvq9) | 31 | 32 | 33 | ## Disclaimer 34 | 35 | This repo is tested under Python 3.7, PyTorch 1.7.0, Cuda 11.0, and mmcv==1.2.5. 36 | 37 | ## Installation 38 | 39 | This repo is built based on [mmdetection](https://github.com/open-mmlab/mmdetection). 40 | 41 | You can use following commands to create conda env with related dependencies. 42 | ``` 43 | conda create -n oln python=3.7 -y 44 | conda activate oln 45 | conda install pytorch=1.7.0 torchvision cudatoolkit=11.0 -c pytorch -y 46 | pip install mmcv-full==1.2.7 47 | pip install -r requirements.txt 48 | pip install -v -e . 49 | ``` 50 | Please also refer to [get_started.md](docs/get_started.md) for more details of installation. 51 | 52 | 53 | ## Prepare datasets 54 | 55 | COCO dataset is available from official websites. It is recommended to download and extract the dataset somewhere outside the project directory and symlink the dataset root to $OLN/data as below. 56 | ``` 57 | object_localization_network 58 | ├── mmdet 59 | ├── tools 60 | ├── configs 61 | ├── data 62 | │ ├── coco 63 | │ │ ├── annotations 64 | │ │ ├── train2017 65 | │ │ ├── val2017 66 | │ │ ├── test2017 67 | 68 | ``` 69 | 70 | 71 | ## Testing 72 | Our trained models are available for download [here](https://drive.google.com/uc?id=1uL6TRhpSILvWeR6DZ0x9K9VywrQXQvq9). Place it under `trained_weights/latest.pth` and run the following commands to test OLN on COCO dataset. 73 | 74 | ``` 75 | # Multi-GPU distributed testing 76 | bash tools/dist_test_bbox.sh configs/oln_box/oln_box.py \ 77 | trained_weights/latest.pth ${NUM_GPUS} 78 | # OR 79 | python tools/test.py configs/oln_box/oln_box.py work_dirs/oln_box/latest.pth --eval bbox 80 | ``` 81 | 82 | 83 | ## Training 84 | ``` 85 | # Multi-GPU distributed training 86 | bash tools/dist_train.sh configs/oln_box/oln_box.py ${NUM_GPUS} 87 | 88 | ``` 89 | 90 | 91 | ## Contact 92 | 93 | If you have any questions regarding the repo, please contact Dahun Kim (mcahny01@gmail.com) or create an issue. 94 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='bbox') 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(metric=['bbox', 'segm']) 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(interval=1, metric='bbox') 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict( 8 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='SegRescale', scale_factor=1 / 8), 14 | dict(type='DefaultFormatBundle'), 15 | dict( 16 | type='Collect', 17 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip', flip_ratio=0.5), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_train2017.json', 40 | img_prefix=data_root + 'train2017/', 41 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'annotations/instances_val2017.json', 51 | img_prefix=data_root + 'val2017/', 52 | pipeline=test_pipeline)) 53 | evaluation = dict(metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | imgs_per_gpu=2, 33 | workers_per_gpu=1, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 37 | img_prefix=data_root + 'Img/', 38 | pipeline=train_pipeline, 39 | data_root=data_root), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 43 | img_prefix=data_root + 'Img/', 44 | pipeline=test_pipeline, 45 | data_root=data_root), 46 | test=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 49 | 'annotations/DeepFashion_segmentation_gallery.json', 50 | img_prefix=data_root + 'Img/', 51 | pipeline=test_pipeline, 52 | data_root=data_root)) 53 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_detection.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_detection.py' 2 | dataset_type = 'LVISV05Dataset' 3 | data_root = 'data/LVIS/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | _delete_=True, 9 | type='ClassBalancedDataset', 10 | oversample_thr=1e-3, 11 | dataset=dict( 12 | type=dataset_type, 13 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 14 | # ann_file=data_root + 'annotations/lvis_v0.5_train_10.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_detection_shot.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_detection.py' 2 | dataset_type = 'LVISV05Dataset' 3 | data_root = 'data/LVIS/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | _delete_=True, 9 | type='ClassBalancedDataset', 10 | oversample_thr=1e-3, 11 | dataset=dict( 12 | type=dataset_type, 13 | # ann_file=data_root + 'annotations/lvis_v0.5_train.json', 14 | ann_file=data_root + 'annotations/lvis_v0.5_train_10.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_instance.py' 2 | dataset_type = 'LVISV05Dataset' 3 | # data_root = 'data/lvis_v0.5/' 4 | data_root = '/data2/LVIS/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_instance.py' 2 | dataset_type = 'LVISV1Dataset' 3 | data_root = 'data/lvis_v1/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | _delete_=True, 9 | type='ClassBalancedDataset', 10 | oversample_thr=1e-3, 11 | dataset=dict( 12 | type=dataset_type, 13 | ann_file=data_root + 'annotations/lvis_v1_train.json', 14 | img_prefix=data_root)), 15 | val=dict( 16 | type=dataset_type, 17 | ann_file=data_root + 'annotations/lvis_v1_val.json', 18 | img_prefix=data_root), 19 | test=dict( 20 | type=dataset_type, 21 | ann_file=data_root + 'annotations/lvis_v1_val.json', 22 | img_prefix=data_root)) 23 | evaluation = dict(metric=['bbox', 'segm']) 24 | -------------------------------------------------------------------------------- /configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=3, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=[ 40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 42 | ], 43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 44 | pipeline=train_pipeline)), 45 | val=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 48 | img_prefix=data_root + 'VOC2007/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 53 | img_prefix=data_root + 'VOC2007/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='mAP') 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'WIDERFaceDataset' 3 | data_root = 'data/WIDERFace/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile', to_float32=True), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='PhotoMetricDistortion', 10 | brightness_delta=32, 11 | contrast_range=(0.5, 1.5), 12 | saturation_range=(0.5, 1.5), 13 | hue_delta=18), 14 | dict( 15 | type='Expand', 16 | mean=img_norm_cfg['mean'], 17 | to_rgb=img_norm_cfg['to_rgb'], 18 | ratio_range=(1, 4)), 19 | dict( 20 | type='MinIoURandomCrop', 21 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 22 | min_crop_size=0.3), 23 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='RandomFlip', flip_ratio=0.5), 26 | dict(type='DefaultFormatBundle'), 27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(300, 300), 34 | flip=False, 35 | transforms=[ 36 | dict(type='Resize', keep_ratio=False), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=60, 44 | workers_per_gpu=2, 45 | train=dict( 46 | type='RepeatDataset', 47 | times=2, 48 | dataset=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'train.txt', 51 | img_prefix=data_root + 'WIDER_train/', 52 | min_size=17, 53 | pipeline=train_pipeline)), 54 | val=dict( 55 | type=dataset_type, 56 | ann_file=data_root + 'val.txt', 57 | img_prefix=data_root + 'WIDER_val/', 58 | pipeline=test_pipeline), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'val.txt', 62 | img_prefix=data_root + 'WIDER_val/', 63 | pipeline=test_pipeline)) 64 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | dist_params = dict(backend='nccl') 11 | log_level = 'INFO' 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | -------------------------------------------------------------------------------- /configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_across_levels=False, 75 | nms_pre=12000, 76 | nms_post=2000, 77 | max_num=2000, 78 | nms_thr=0.7, 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_across_levels=False, 99 | nms_pre=6000, 100 | nms_post=1000, 101 | max_num=1000, 102 | nms_thr=0.7, 103 | min_bbox_size=0), 104 | rcnn=dict( 105 | score_thr=0.05, 106 | nms=dict(type='nms', iou_threshold=0.5), 107 | max_per_img=100))) 108 | -------------------------------------------------------------------------------- /configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_across_levels=False, 54 | nms_pre=12000, 55 | nms_post=2000, 56 | max_num=2000, 57 | nms_thr=0.7, 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_across_levels=False, 56 | nms_pre=2000, 57 | nms_post=1000, 58 | max_num=1000, 59 | nms_thr=0.7, 60 | min_bbox_size=0))) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | train_cfg=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.5, 35 | neg_iou_thr=0.5, 36 | min_pos_iou=0., 37 | ignore_iof_thr=-1, 38 | gt_max_assign_all=False), 39 | smoothl1_beta=1., 40 | allowed_border=-1, 41 | pos_weight=-1, 42 | neg_pos_ratio=3, 43 | debug=False), 44 | test_cfg=dict( 45 | nms=dict(type='nms', iou_threshold=0.45), 46 | min_bbox_size=0, 47 | score_thr=0.02, 48 | max_per_img=200)) 49 | cudnn_benchmark = True 50 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | total_epochs = 12 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | total_epochs = 20 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | total_epochs = 24 12 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.6.0" 2 | ARG CUDA="10.1" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 10 | 11 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # Install MMCV 16 | RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html 17 | 18 | # Install MMDetection 19 | RUN conda clean --all 20 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 21 | WORKDIR /mmdetection 22 | ENV FORCE_CUDA="1" 23 | RUN pip install -r requirements/build.txt 24 | RUN pip install --no-cache-dir -e . 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ================= 3 | 4 | mmdet.apis 5 | -------------- 6 | .. automodule:: mmdet.apis 7 | :members: 8 | 9 | mmdet.core 10 | -------------- 11 | 12 | anchor 13 | ^^^^^^^^^^ 14 | .. automodule:: mmdet.core.anchor 15 | :members: 16 | 17 | bbox 18 | ^^^^^^^^^^ 19 | .. automodule:: mmdet.core.bbox 20 | :members: 21 | 22 | export 23 | ^^^^^^^^^^ 24 | .. automodule:: mmdet.core.export 25 | :members: 26 | 27 | mask 28 | ^^^^^^^^^^ 29 | .. automodule:: mmdet.core.mask 30 | :members: 31 | 32 | evaluation 33 | ^^^^^^^^^^ 34 | .. automodule:: mmdet.core.evaluation 35 | :members: 36 | 37 | post_processing 38 | ^^^^^^^^^^^^^^^ 39 | .. automodule:: mmdet.core.post_processing 40 | :members: 41 | 42 | optimizer 43 | ^^^^^^^^^^ 44 | .. automodule:: mmdet.core.optimizer 45 | :members: 46 | 47 | utils 48 | ^^^^^^^^^^ 49 | .. automodule:: mmdet.core.utils 50 | :members: 51 | 52 | mmdet.datasets 53 | -------------- 54 | 55 | datasets 56 | ^^^^^^^^^^ 57 | .. automodule:: mmdet.datasets 58 | :members: 59 | 60 | pipelines 61 | ^^^^^^^^^^ 62 | .. automodule:: mmdet.datasets.pipelines 63 | :members: 64 | 65 | mmdet.models 66 | -------------- 67 | 68 | detectors 69 | ^^^^^^^^^^ 70 | .. automodule:: mmdet.models.detectors 71 | :members: 72 | 73 | backbones 74 | ^^^^^^^^^^ 75 | .. automodule:: mmdet.models.backbones 76 | :members: 77 | 78 | necks 79 | ^^^^^^^^^^^^ 80 | .. automodule:: mmdet.models.necks 81 | :members: 82 | 83 | dense_heads 84 | ^^^^^^^^^^^^ 85 | .. automodule:: mmdet.models.dense_heads 86 | :members: 87 | 88 | roi_heads 89 | ^^^^^^^^^^ 90 | .. automodule:: mmdet.models.roi_heads 91 | :members: 92 | 93 | losses 94 | ^^^^^^^^^^ 95 | .. automodule:: mmdet.models.losses 96 | :members: 97 | 98 | utils 99 | ^^^^^^^^^^ 100 | .. automodule:: mmdet.models.utils 101 | :members: 102 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import subprocess 15 | import sys 16 | 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'MMDetection' 22 | copyright = '2018-2020, OpenMMLab' 23 | author = 'MMDetection Authors' 24 | version_file = '../mmdet/version.py' 25 | 26 | 27 | def get_version(): 28 | with open(version_file, 'r') as f: 29 | exec(compile(f.read(), version_file, 'exec')) 30 | return locals()['__version__'] 31 | 32 | 33 | # The full version, including alpha/beta/rc tags 34 | release = get_version() 35 | 36 | # -- General configuration --------------------------------------------------- 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.napoleon', 44 | 'sphinx.ext.viewcode', 45 | 'recommonmark', 46 | 'sphinx_markdown_tables', 47 | ] 48 | 49 | autodoc_mock_imports = [ 50 | 'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version', 'mmcv.ops' 51 | ] 52 | 53 | # Add any paths that contain templates here, relative to this directory. 54 | templates_path = ['_templates'] 55 | 56 | # The suffix(es) of source filenames. 57 | # You can specify multiple suffix as a list of string: 58 | # 59 | source_suffix = { 60 | '.rst': 'restructuredtext', 61 | '.md': 'markdown', 62 | } 63 | 64 | # The master toctree document. 65 | master_doc = 'index' 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | # This pattern also affects html_static_path and html_extra_path. 70 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 71 | 72 | # -- Options for HTML output ------------------------------------------------- 73 | 74 | # The theme to use for HTML and HTML Help pages. See the documentation for 75 | # a list of builtin themes. 76 | # 77 | html_theme = 'sphinx_rtd_theme' 78 | 79 | # Add any paths that contain custom static files (such as style sheets) here, 80 | # relative to this directory. They are copied after the builtin static files, 81 | # so a file named "default.css" will overwrite the builtin "default.css". 82 | html_static_path = ['_static'] 83 | 84 | 85 | def builder_inited_handler(app): 86 | subprocess.run(['./stat.py']) 87 | 88 | 89 | def setup(app): 90 | app.connect('builder-inited', builder_inited_handler) 91 | -------------------------------------------------------------------------------- /docs/conventions.md: -------------------------------------------------------------------------------- 1 | # Conventions 2 | 3 | Please check the following conventions if you would like to modify MMDetection as your own project. 4 | 5 | ## Loss 6 | 7 | In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`. 8 | 9 | For example, in bbox head, 10 | 11 | ```python 12 | class BBoxHead(nn.Module): 13 | ... 14 | def loss(self, ...): 15 | losses = dict() 16 | # classification loss 17 | losses['loss_cls'] = self.loss_cls(...) 18 | # classification accuracy 19 | losses['acc'] = accuracy(...) 20 | # bbox regression loss 21 | losses['loss_bbox'] = self.loss_bbox(...) 22 | return losses 23 | ``` 24 | 25 | `bbox_head.loss()` will be called during model forward. 26 | The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` . 27 | Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation, 28 | `'acc'` will only be used as a metric to monitor training process. 29 | 30 | By default, only values whose keys contain `'loss'` will be back propagated. 31 | This behavior could be changed by modifying `BaseDetector.train_step()`. 32 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to MMDetection's documentation! 2 | ======================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Get Started 7 | 8 | get_started.md 9 | modelzoo_statistics.md 10 | model_zoo.md 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: Quick Run 15 | 16 | 1_exist_data_model.md 17 | 2_new_data_model.md 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | :caption: Tutorials 22 | 23 | tutorials/index.rst 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | :caption: Useful Tools and Scripts 28 | 29 | useful_tools.md 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: Notes 34 | 35 | conventions.md 36 | compatibility.md 37 | projects.md 38 | changelog.md 39 | faq.md 40 | 41 | .. toctree:: 42 | :caption: API Reference 43 | 44 | api.rst 45 | 46 | Indices and tables 47 | ================== 48 | 49 | * :ref:`genindex` 50 | * :ref:`search` 51 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/stat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import functools as func 3 | import glob 4 | import os.path as osp 5 | import re 6 | 7 | import numpy as np 8 | 9 | url_prefix = 'https://github.com/open-mmlab/mmdetection/blob/master/' 10 | 11 | files = sorted(glob.glob('../configs/*/README.md')) 12 | 13 | stats = [] 14 | titles = [] 15 | num_ckpts = 0 16 | 17 | for f in files: 18 | url = osp.dirname(f.replace('../', url_prefix)) 19 | 20 | with open(f, 'r') as content_file: 21 | content = content_file.read() 22 | 23 | title = content.split('\n')[0].replace('# ', '').strip() 24 | ckpts = set(x.lower().strip() 25 | for x in re.findall(r'\[model\]\((https?.*)\)', content)) 26 | 27 | if len(ckpts) == 0: 28 | continue 29 | 30 | _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)] 31 | assert len(_papertype) > 0 32 | papertype = _papertype[0] 33 | 34 | paper = set([(papertype, title)]) 35 | 36 | titles.append(title) 37 | num_ckpts += len(ckpts) 38 | 39 | statsmsg = f""" 40 | \t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts) 41 | """ 42 | stats.append((paper, ckpts, statsmsg)) 43 | 44 | allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) 45 | msglist = '\n'.join(x for _, _, x in stats) 46 | 47 | papertypes, papercounts = np.unique([t for t, _ in allpapers], 48 | return_counts=True) 49 | countstr = '\n'.join( 50 | [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) 51 | 52 | modelzoo = f""" 53 | # Model Zoo Statistics 54 | 55 | * Number of papers: {len(set(titles))} 56 | {countstr} 57 | 58 | * Number of checkpoints: {num_ckpts} 59 | 60 | {msglist} 61 | """ 62 | 63 | with open('modelzoo_statistics.md', 'w') as f: 64 | f.write(modelzoo) 65 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | config.md 5 | customize_dataset.md 6 | data_pipeline.md 7 | customize_models.md 8 | customize_runtime.md 9 | customize_losses.md 10 | finetune.md 11 | pytorch2onnx.md 12 | -------------------------------------------------------------------------------- /images/epic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/images/epic.png -------------------------------------------------------------------------------- /images/oln_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/images/oln_overview.png -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from .version import __version__, short_version 4 | 5 | 6 | def digit_version(version_str): 7 | digit_version = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | digit_version.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | digit_version.append(int(patch_version[0]) - 1) 14 | digit_version.append(int(patch_version[1])) 15 | return digit_version 16 | 17 | 18 | mmcv_minimum_version = '1.2.4' 19 | mmcv_maximum_version = '1.3' 20 | mmcv_version = digit_version(mmcv.__version__) 21 | 22 | 23 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 24 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 25 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 26 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 27 | 28 | __all__ = ['__version__', 'short_version'] 29 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 8 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 9 | 'multi_gpu_test', 'single_gpu_test' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .export import * # noqa: F401, F403 5 | from .fp16 import * # noqa: F401, F403 6 | from .mask import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator, 2 | YOLOAnchorGenerator) 3 | from .builder import ANCHOR_GENERATORS, build_anchor_generator 4 | from .point_generator import PointGenerator 5 | from .utils import anchor_inside_flags, calc_region, images_to_levels 6 | 7 | __all__ = [ 8 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 9 | 'PointGenerator', 'images_to_levels', 'calc_region', 10 | 'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | ANCHOR_GENERATORS = Registry('Anchor generator') 4 | 5 | 6 | def build_anchor_generator(cfg, default_args=None): 7 | return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args) 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .builder import ANCHOR_GENERATORS 4 | 5 | 6 | @ANCHOR_GENERATORS.register_module() 7 | class PointGenerator(object): 8 | 9 | def _meshgrid(self, x, y, row_major=True): 10 | xx = x.repeat(len(y)) 11 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 12 | if row_major: 13 | return xx, yy 14 | else: 15 | return yy, xx 16 | 17 | def grid_points(self, featmap_size, stride=16, device='cuda'): 18 | feat_h, feat_w = featmap_size 19 | shift_x = torch.arange(0., feat_w, device=device) * stride 20 | shift_y = torch.arange(0., feat_h, device=device) * stride 21 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 22 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 23 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 24 | all_points = shifts.to(device) 25 | return all_points 26 | 27 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 28 | feat_h, feat_w = featmap_size 29 | valid_h, valid_w = valid_size 30 | assert valid_h <= feat_h and valid_w <= feat_w 31 | valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) 32 | valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device) 33 | valid_x[:valid_w] = 1 34 | valid_y[:valid_h] = 1 35 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 36 | valid = valid_xx & valid_yy 37 | return valid 38 | -------------------------------------------------------------------------------- /mmdet/core/anchor/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def images_to_levels(target, num_levels): 5 | """Convert targets by image to targets by feature level. 6 | 7 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 8 | """ 9 | target = torch.stack(target, 0) 10 | level_targets = [] 11 | start = 0 12 | for n in num_levels: 13 | end = start + n 14 | # level_targets.append(target[:, start:end].squeeze(0)) 15 | level_targets.append(target[:, start:end]) 16 | start = end 17 | return level_targets 18 | 19 | 20 | def anchor_inside_flags(flat_anchors, 21 | valid_flags, 22 | img_shape, 23 | allowed_border=0): 24 | """Check whether the anchors are inside the border. 25 | 26 | Args: 27 | flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4). 28 | valid_flags (torch.Tensor): An existing valid flags of anchors. 29 | img_shape (tuple(int)): Shape of current image. 30 | allowed_border (int, optional): The border to allow the valid anchor. 31 | Defaults to 0. 32 | 33 | Returns: 34 | torch.Tensor: Flags indicating whether the anchors are inside a \ 35 | valid range. 36 | """ 37 | img_h, img_w = img_shape[:2] 38 | if allowed_border >= 0: 39 | inside_flags = valid_flags & \ 40 | (flat_anchors[:, 0] >= -allowed_border) & \ 41 | (flat_anchors[:, 1] >= -allowed_border) & \ 42 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 43 | (flat_anchors[:, 3] < img_h + allowed_border) 44 | else: 45 | inside_flags = valid_flags 46 | return inside_flags 47 | 48 | 49 | def calc_region(bbox, ratio, featmap_size=None): 50 | """Calculate a proportional bbox region. 51 | 52 | The bbox center are fixed and the new h' and w' is h * ratio and w * ratio. 53 | 54 | Args: 55 | bbox (Tensor): Bboxes to calculate regions, shape (n, 4). 56 | ratio (float): Ratio of the output region. 57 | featmap_size (tuple): Feature map size used for clipping the boundary. 58 | 59 | Returns: 60 | tuple: x1, y1, x2, y2 61 | """ 62 | x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long() 63 | y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long() 64 | x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() 65 | y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() 66 | if featmap_size is not None: 67 | x1 = x1.clamp(min=0, max=featmap_size[1]) 68 | y1 = y1.clamp(min=0, max=featmap_size[0]) 69 | x2 = x2.clamp(min=0, max=featmap_size[1]) 70 | y2 = y2.clamp(min=0, max=featmap_size[0]) 71 | return (x1, y1, x2, y2) 72 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 2 | MaxIoUAssigner, RegionAssigner) 3 | from .builder import build_assigner, build_bbox_coder, build_sampler 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, 5 | TBLRBBoxCoder) 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 7 | from .samplers import (BaseSampler, CombinedSampler, 8 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 9 | OHEMSampler, PseudoSampler, RandomSampler, 10 | SamplingResult, ScoreHLRSampler) 11 | from .transforms import (bbox2distance, bbox2result, bbox2roi, 12 | bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, 13 | bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, 14 | distance2bbox, roi2bbox) 15 | 16 | __all__ = [ 17 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 18 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 19 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 20 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner', 21 | 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 22 | 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', 23 | 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 24 | 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner', 25 | 'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh', 26 | 'RegionAssigner' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .center_region_assigner import CenterRegionAssigner 6 | from .grid_assigner import GridAssigner 7 | from .hungarian_assigner import HungarianAssigner 8 | from .max_iou_assigner import MaxIoUAssigner 9 | from .point_assigner import PointAssigner 10 | from .region_assigner import RegionAssigner 11 | 12 | __all__ = [ 13 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 14 | 'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner', 15 | 'HungarianAssigner', 'RegionAssigner' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | """Base assigner that assigns boxes to ground truth boxes.""" 6 | 7 | @abstractmethod 8 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 9 | """Assign boxes to either a ground truth boxe or a negative boxes.""" 10 | pass 11 | -------------------------------------------------------------------------------- /mmdet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | BBOX_ASSIGNERS = Registry('bbox_assigner') 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_assigner(cfg, **default_args): 9 | """Builder of box assigner.""" 10 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 11 | 12 | 13 | def build_sampler(cfg, **default_args): 14 | """Builder of box sampler.""" 15 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 16 | 17 | 18 | def build_bbox_coder(cfg, **default_args): 19 | """Builder of box coder.""" 20 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_bbox_coder import BaseBBoxCoder 2 | from .bucketing_bbox_coder import BucketingBBoxCoder 3 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 4 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 5 | from .pseudo_bbox_coder import PseudoBBoxCoder 6 | from .tblr_bbox_coder import TBLRBBoxCoder 7 | from .yolo_bbox_coder import YOLOBBoxCoder 8 | 9 | __all__ = [ 10 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 11 | 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder', 12 | 'BucketingBBoxCoder' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseBBoxCoder(metaclass=ABCMeta): 5 | """Base bounding box coder.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def encode(self, bboxes, gt_bboxes): 12 | """Encode deltas between bboxes and ground truth boxes.""" 13 | pass 14 | 15 | @abstractmethod 16 | def decode(self, bboxes, bboxes_pred): 17 | """Decode the predicted bboxes according to prediction and base 18 | boxes.""" 19 | pass 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_CODERS 2 | from .base_bbox_coder import BaseBBoxCoder 3 | 4 | 5 | @BBOX_CODERS.register_module() 6 | class PseudoBBoxCoder(BaseBBoxCoder): 7 | """Pseudo bounding box coder.""" 8 | 9 | def __init__(self, **kwargs): 10 | super(BaseBBoxCoder, self).__init__(**kwargs) 11 | 12 | def encode(self, bboxes, gt_bboxes): 13 | """torch.Tensor: return the given ``bboxes``""" 14 | return gt_bboxes 15 | 16 | def decode(self, bboxes, pred_bboxes): 17 | """torch.Tensor: return the given ``pred_bboxes``""" 18 | return pred_bboxes 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/yolo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_CODERS 4 | from .base_bbox_coder import BaseBBoxCoder 5 | 6 | 7 | @BBOX_CODERS.register_module() 8 | class YOLOBBoxCoder(BaseBBoxCoder): 9 | """YOLO BBox coder. 10 | 11 | Following `YOLO `_, this coder divide 12 | image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh). 13 | cx, cy in [0., 1.], denotes relative center position w.r.t the center of 14 | bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`. 15 | 16 | Args: 17 | eps (float): Min value of cx, cy when encoding. 18 | """ 19 | 20 | def __init__(self, eps=1e-6): 21 | super(BaseBBoxCoder, self).__init__() 22 | self.eps = eps 23 | 24 | def encode(self, bboxes, gt_bboxes, stride): 25 | """Get box regression transformation deltas that can be used to 26 | transform the ``bboxes`` into the ``gt_bboxes``. 27 | 28 | Args: 29 | bboxes (torch.Tensor): Source boxes, e.g., anchors. 30 | gt_bboxes (torch.Tensor): Target of the transformation, e.g., 31 | ground-truth boxes. 32 | stride (torch.Tensor | int): Stride of bboxes. 33 | 34 | Returns: 35 | torch.Tensor: Box transformation deltas 36 | """ 37 | 38 | assert bboxes.size(0) == gt_bboxes.size(0) 39 | assert bboxes.size(-1) == gt_bboxes.size(-1) == 4 40 | x_center_gt = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) * 0.5 41 | y_center_gt = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) * 0.5 42 | w_gt = gt_bboxes[..., 2] - gt_bboxes[..., 0] 43 | h_gt = gt_bboxes[..., 3] - gt_bboxes[..., 1] 44 | x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5 45 | y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5 46 | w = bboxes[..., 2] - bboxes[..., 0] 47 | h = bboxes[..., 3] - bboxes[..., 1] 48 | w_target = torch.log((w_gt / w).clamp(min=self.eps)) 49 | h_target = torch.log((h_gt / h).clamp(min=self.eps)) 50 | x_center_target = ((x_center_gt - x_center) / stride + 0.5).clamp( 51 | self.eps, 1 - self.eps) 52 | y_center_target = ((y_center_gt - y_center) / stride + 0.5).clamp( 53 | self.eps, 1 - self.eps) 54 | encoded_bboxes = torch.stack( 55 | [x_center_target, y_center_target, w_target, h_target], dim=-1) 56 | return encoded_bboxes 57 | 58 | def decode(self, bboxes, pred_bboxes, stride): 59 | """Apply transformation `pred_bboxes` to `boxes`. 60 | 61 | Args: 62 | boxes (torch.Tensor): Basic boxes, e.g. anchors. 63 | pred_bboxes (torch.Tensor): Encoded boxes with shape 64 | stride (torch.Tensor | int): Strides of bboxes. 65 | 66 | Returns: 67 | torch.Tensor: Decoded boxes. 68 | """ 69 | assert pred_bboxes.size(0) == bboxes.size(0) 70 | assert pred_bboxes.size(-1) == bboxes.size(-1) == 4 71 | x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5 72 | y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5 73 | w = bboxes[..., 2] - bboxes[..., 0] 74 | h = bboxes[..., 3] - bboxes[..., 1] 75 | # Get outputs x, y 76 | x_center_pred = (pred_bboxes[..., 0] - 0.5) * stride + x_center 77 | y_center_pred = (pred_bboxes[..., 1] - 0.5) * stride + y_center 78 | w_pred = torch.exp(pred_bboxes[..., 2]) * w 79 | h_pred = torch.exp(pred_bboxes[..., 3]) * h 80 | 81 | decoded_bboxes = torch.stack( 82 | (x_center_pred - w_pred / 2, y_center_pred - h_pred / 2, 83 | x_center_pred + w_pred / 2, y_center_pred + h_pred / 2), 84 | dim=-1) 85 | 86 | return decoded_bboxes 87 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """Simple version of the ``kwarray.ensure_rng`` 7 | 8 | Args: 9 | rng (int | numpy.random.RandomState | None): 10 | if None, then defaults to the global rng. Otherwise this can be an 11 | integer or a RandomState class 12 | Returns: 13 | (numpy.random.RandomState) : rng - 14 | a numpy random number generator 15 | 16 | References: 17 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 18 | """ 19 | 20 | if rng is None: 21 | rng = np.random.mtrand._rand 22 | elif isinstance(rng, int): 23 | rng = np.random.RandomState(rng) 24 | else: 25 | rng = rng 26 | return rng 27 | 28 | 29 | def random_boxes(num=1, scale=1, rng=None): 30 | """Simple version of ``kwimage.Boxes.random`` 31 | 32 | Returns: 33 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 34 | 35 | References: 36 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 37 | 38 | Example: 39 | >>> num = 3 40 | >>> scale = 512 41 | >>> rng = 0 42 | >>> boxes = random_boxes(num, scale, rng) 43 | >>> print(boxes) 44 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 45 | [216.9113, 330.6978, 224.0446, 456.5878], 46 | [405.3632, 196.3221, 493.3953, 270.7942]]) 47 | """ 48 | rng = ensure_rng(rng) 49 | 50 | tlbr = rng.rand(num, 4).astype(np.float32) 51 | 52 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 53 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 54 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 55 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 56 | 57 | tlbr[:, 0] = tl_x * scale 58 | tlbr[:, 1] = tl_y * scale 59 | tlbr[:, 2] = br_x * scale 60 | tlbr[:, 3] = br_y * scale 61 | 62 | boxes = torch.from_numpy(tlbr) 63 | return boxes 64 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_iou_calculator 2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 3 | 4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 5 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | IOU_CALCULATORS = Registry('IoU calculator') 4 | 5 | 6 | def build_iou_calculator(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_match_cost 2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost 3 | 4 | __all__ = [ 5 | 'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost', 6 | 'FocalLossCost' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | MATCH_COST = Registry('Match Cost') 4 | 5 | 6 | def build_match_cost(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, MATCH_COST, default_args) 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | from .score_hlr_sampler import ScoreHLRSampler 10 | 11 | __all__ = [ 12 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 13 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 14 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_SAMPLERS, build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | @BBOX_SAMPLERS.register_module() 6 | class CombinedSampler(BaseSampler): 7 | """A sampler that combines positive sampler and negative sampler.""" 8 | 9 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 10 | super(CombinedSampler, self).__init__(**kwargs) 11 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 12 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 13 | 14 | def _sample_pos(self, **kwargs): 15 | """Sample positive samples.""" 16 | raise NotImplementedError 17 | 18 | def _sample_neg(self, **kwargs): 19 | """Sample negative samples.""" 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from ..builder import BBOX_SAMPLERS 5 | from .random_sampler import RandomSampler 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class InstanceBalancedPosSampler(RandomSampler): 10 | """Instance balanced sampler that samples equal number of positive samples 11 | for each instance.""" 12 | 13 | def _sample_pos(self, assign_result, num_expected, **kwargs): 14 | """Sample positive boxes. 15 | 16 | Args: 17 | assign_result (:obj:`AssignResult`): The assigned results of boxes. 18 | num_expected (int): The number of expected positive samples 19 | 20 | Returns: 21 | Tensor or ndarray: sampled indices. 22 | """ 23 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 24 | if pos_inds.numel() != 0: 25 | pos_inds = pos_inds.squeeze(1) 26 | if pos_inds.numel() <= num_expected: 27 | return pos_inds 28 | else: 29 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 30 | num_gts = len(unique_gt_inds) 31 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 32 | sampled_inds = [] 33 | for i in unique_gt_inds: 34 | inds = torch.nonzero( 35 | assign_result.gt_inds == i.item(), as_tuple=False) 36 | if inds.numel() != 0: 37 | inds = inds.squeeze(1) 38 | else: 39 | continue 40 | if len(inds) > num_per_gt: 41 | inds = self.random_choice(inds, num_per_gt) 42 | sampled_inds.append(inds) 43 | sampled_inds = torch.cat(sampled_inds) 44 | if len(sampled_inds) < num_expected: 45 | num_extra = num_expected - len(sampled_inds) 46 | extra_inds = np.array( 47 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 48 | if len(extra_inds) > num_extra: 49 | extra_inds = self.random_choice(extra_inds, num_extra) 50 | extra_inds = torch.from_numpy(extra_inds).to( 51 | assign_result.gt_inds.device).long() 52 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 53 | elif len(sampled_inds) > num_expected: 54 | sampled_inds = self.random_choice(sampled_inds, num_expected) 55 | return sampled_inds 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class PseudoSampler(BaseSampler): 10 | """A pseudo sampler that does not do sampling actually.""" 11 | 12 | def __init__(self, **kwargs): 13 | pass 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples.""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples.""" 21 | raise NotImplementedError 22 | 23 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 24 | """Directly returns the positive and negative indices of samples. 25 | 26 | Args: 27 | assign_result (:obj:`AssignResult`): Assigned results 28 | bboxes (torch.Tensor): Bounding boxes 29 | gt_bboxes (torch.Tensor): Ground truth boxes 30 | 31 | Returns: 32 | :obj:`SamplingResult`: sampler results 33 | """ 34 | pos_inds = torch.nonzero( 35 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 36 | neg_inds = torch.nonzero( 37 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 38 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 39 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 40 | assign_result, gt_flags) 41 | return sampling_result 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | @BBOX_SAMPLERS.register_module() 8 | class RandomSampler(BaseSampler): 9 | """Random sampler. 10 | 11 | Args: 12 | num (int): Number of samples 13 | pos_fraction (float): Fraction of positive samples 14 | neg_pos_up (int, optional): Upper bound number of negative and 15 | positive samples. Defaults to -1. 16 | add_gt_as_proposals (bool, optional): Whether to add ground truth 17 | boxes as proposals. Defaults to True. 18 | """ 19 | 20 | def __init__(self, 21 | num, 22 | pos_fraction, 23 | neg_pos_ub=-1, 24 | add_gt_as_proposals=True, 25 | **kwargs): 26 | from mmdet.core.bbox import demodata 27 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 28 | add_gt_as_proposals) 29 | self.rng = demodata.ensure_rng(kwargs.get('rng', None)) 30 | 31 | def random_choice(self, gallery, num): 32 | """Random select some elements from the gallery. 33 | 34 | If `gallery` is a Tensor, the returned indices will be a Tensor; 35 | If `gallery` is a ndarray or list, the returned indices will be a 36 | ndarray. 37 | 38 | Args: 39 | gallery (Tensor | ndarray | list): indices pool. 40 | num (int): expected sample num. 41 | 42 | Returns: 43 | Tensor or ndarray: sampled indices. 44 | """ 45 | assert len(gallery) >= num 46 | 47 | is_tensor = isinstance(gallery, torch.Tensor) 48 | if not is_tensor: 49 | if torch.cuda.is_available(): 50 | device = torch.cuda.current_device() 51 | else: 52 | device = 'cpu' 53 | gallery = torch.tensor(gallery, dtype=torch.long, device=device) 54 | perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] 55 | rand_inds = gallery[perm] 56 | if not is_tensor: 57 | rand_inds = rand_inds.cpu().numpy() 58 | return rand_inds 59 | 60 | def _sample_pos(self, assign_result, num_expected, **kwargs): 61 | """Randomly sample some positive samples.""" 62 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 63 | if pos_inds.numel() != 0: 64 | pos_inds = pos_inds.squeeze(1) 65 | if pos_inds.numel() <= num_expected: 66 | return pos_inds 67 | else: 68 | return self.random_choice(pos_inds, num_expected) 69 | 70 | def _sample_neg(self, assign_result, num_expected, **kwargs): 71 | """Randomly sample some negative samples.""" 72 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 73 | if neg_inds.numel() != 0: 74 | neg_inds = neg_inds.squeeze(1) 75 | if len(neg_inds) <= num_expected: 76 | return neg_inds 77 | else: 78 | return self.random_choice(neg_inds, num_expected) 79 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 2 | get_classes, imagenet_det_classes, 3 | imagenet_vid_classes, voc_classes) 4 | from .eval_hooks import DistEvalHook, EvalHook 5 | from .mean_ap import average_precision, eval_map, print_map_summary 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 7 | print_recall_summary) 8 | 9 | __all__ = [ 10 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 11 | 'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes', 12 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 13 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 14 | 'plot_num_recall', 'plot_iou_recall' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start, 0) * np.maximum( 39 | y_end - y_start, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | union = np.maximum(union, eps) 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /mmdet/core/export/__init__.py: -------------------------------------------------------------------------------- 1 | from .pytorch2onnx import (build_model_from_cfg, 2 | generate_inputs_and_wrap_model, 3 | preprocess_example_input) 4 | 5 | __all__ = [ 6 | 'build_model_from_cfg', 'generate_inputs_and_wrap_model', 7 | 'preprocess_example_input' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .deprecated_fp16_utils import \ 2 | DeprecatedFp16OptimizerHook as Fp16OptimizerHook 3 | from .deprecated_fp16_utils import deprecated_auto_fp16 as auto_fp16 4 | from .deprecated_fp16_utils import deprecated_force_fp32 as force_fp32 5 | from .deprecated_fp16_utils import \ 6 | deprecated_wrap_fp16_model as wrap_fp16_model 7 | 8 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 9 | -------------------------------------------------------------------------------- /mmdet/core/fp16/deprecated_fp16_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmcv.runner import (Fp16OptimizerHook, auto_fp16, force_fp32, 4 | wrap_fp16_model) 5 | 6 | 7 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook): 8 | """A wrapper class for the FP16 optimizer hook. This class wraps 9 | :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the 10 | :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated. 11 | 12 | Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details. 13 | 14 | Args: 15 | loss_scale (float): Scale factor multiplied with loss. 16 | """ 17 | 18 | def __init__(*args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | warnings.warn( 21 | 'Importing Fp16OptimizerHook from "mmdet.core" will be ' 22 | 'deprecated in the future. Please import them from "mmcv.runner" ' 23 | 'instead') 24 | 25 | 26 | def deprecated_auto_fp16(*args, **kwargs): 27 | warnings.warn( 28 | 'Importing auto_fp16 from "mmdet.core" will be ' 29 | 'deprecated in the future. Please import them from "mmcv.runner" ' 30 | 'instead') 31 | return auto_fp16(*args, **kwargs) 32 | 33 | 34 | def deprecated_force_fp32(*args, **kwargs): 35 | warnings.warn( 36 | 'Importing force_fp32 from "mmdet.core" will be ' 37 | 'deprecated in the future. Please import them from "mmcv.runner" ' 38 | 'instead') 39 | return force_fp32(*args, **kwargs) 40 | 41 | 42 | def deprecated_wrap_fp16_model(*args, **kwargs): 43 | warnings.warn( 44 | 'Importing wrap_fp16_model from "mmdet.core" will be ' 45 | 'deprecated in the future. Please import them from "mmcv.runner" ' 46 | 'instead') 47 | wrap_fp16_model(*args, **kwargs) 48 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 3 | from .utils import encode_mask_results, split_combined_polys 4 | 5 | __all__ = [ 6 | 'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks', 7 | 'PolygonMasks', 'encode_mask_results' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | """Compute mask target for positive proposals in multiple images. 9 | 10 | Args: 11 | pos_proposals_list (list[Tensor]): Positive proposals in multiple 12 | images. 13 | pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each 14 | positive proposals. 15 | gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of 16 | each image. 17 | cfg (dict): Config dict that specifies the mask size. 18 | 19 | Returns: 20 | list[Tensor]: Mask target of each image. 21 | """ 22 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 23 | mask_targets = map(mask_target_single, pos_proposals_list, 24 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 25 | mask_targets = list(mask_targets) 26 | if len(mask_targets) > 0: 27 | mask_targets = torch.cat(mask_targets) 28 | return mask_targets 29 | 30 | 31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 32 | """Compute mask target for each positive proposal in the image. 33 | 34 | Args: 35 | pos_proposals (Tensor): Positive proposals. 36 | pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals. 37 | gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap 38 | or Polygon. 39 | cfg (dict): Config dict that indicate the mask size. 40 | 41 | Returns: 42 | Tensor: Mask target of each positive proposals in the image. 43 | """ 44 | device = pos_proposals.device 45 | mask_size = _pair(cfg.mask_size) 46 | num_pos = pos_proposals.size(0) 47 | if num_pos > 0: 48 | proposals_np = pos_proposals.cpu().numpy() 49 | maxh, maxw = gt_masks.height, gt_masks.width 50 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw) 51 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh) 52 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 53 | 54 | mask_targets = gt_masks.crop_and_resize( 55 | proposals_np, mask_size, device=device, 56 | inds=pos_assigned_gt_inds).to_ndarray() 57 | 58 | mask_targets = torch.from_numpy(mask_targets).float().to(device) 59 | else: 60 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 61 | 62 | return mask_targets 63 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | 5 | 6 | def split_combined_polys(polys, poly_lens, polys_per_mask): 7 | """Split the combined 1-D polys into masks. 8 | 9 | A mask is represented as a list of polys, and a poly is represented as 10 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 11 | tensor. Here we need to split the tensor into original representations. 12 | 13 | Args: 14 | polys (list): a list (length = image num) of 1-D tensors 15 | poly_lens (list): a list (length = image num) of poly length 16 | polys_per_mask (list): a list (length = image num) of poly number 17 | of each mask 18 | 19 | Returns: 20 | list: a list (length = image num) of list (length = mask num) of \ 21 | list (length = poly num) of numpy array. 22 | """ 23 | mask_polys_list = [] 24 | for img_id in range(len(polys)): 25 | polys_single = polys[img_id] 26 | polys_lens_single = poly_lens[img_id].tolist() 27 | polys_per_mask_single = polys_per_mask[img_id].tolist() 28 | 29 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 30 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 31 | mask_polys_list.append(mask_polys) 32 | return mask_polys_list 33 | 34 | 35 | # TODO: move this function to more proper place 36 | def encode_mask_results(mask_results): 37 | """Encode bitmap mask to RLE code. 38 | 39 | Args: 40 | mask_results (list | tuple[list]): bitmap mask results. 41 | In mask scoring rcnn, mask_results is a tuple of (segm_results, 42 | segm_cls_score). 43 | 44 | Returns: 45 | list | tuple: RLE encoded mask. 46 | """ 47 | if isinstance(mask_results, tuple): # mask scoring 48 | cls_segms, cls_mask_scores = mask_results 49 | else: 50 | cls_segms = mask_results 51 | num_classes = len(cls_segms) 52 | encoded_mask_results = [[] for _ in range(num_classes)] 53 | for i in range(len(cls_segms)): 54 | for cls_segm in cls_segms[i]: 55 | encoded_mask_results[i].append( 56 | mask_util.encode( 57 | np.array( 58 | cls_segm[:, :, np.newaxis], order='F', 59 | dtype='uint8'))[0]) # encoded with RLE 60 | if isinstance(mask_results, tuple): 61 | return encoded_mask_results, cls_mask_scores 62 | else: 63 | return encoded_mask_results 64 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import fast_nms, multiclass_nms 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks', 'fast_nms' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads, reduce_mean 2 | from .misc import mask2ndarray, multi_apply, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply', 6 | 'unmap', 'mask2ndarray' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import OrderedDict 3 | 4 | import torch.distributed as dist 5 | from mmcv.runner import OptimizerHook 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 7 | _unflatten_dense_tensors) 8 | 9 | 10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 11 | if bucket_size_mb > 0: 12 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 13 | buckets = _take_tensors(tensors, bucket_size_bytes) 14 | else: 15 | buckets = OrderedDict() 16 | for tensor in tensors: 17 | tp = tensor.type() 18 | if tp not in buckets: 19 | buckets[tp] = [] 20 | buckets[tp].append(tensor) 21 | buckets = buckets.values() 22 | 23 | for bucket in buckets: 24 | flat_tensors = _flatten_dense_tensors(bucket) 25 | dist.all_reduce(flat_tensors) 26 | flat_tensors.div_(world_size) 27 | for tensor, synced in zip( 28 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 29 | tensor.copy_(synced) 30 | 31 | 32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 33 | """Allreduce gradients. 34 | 35 | Args: 36 | params (list[torch.Parameters]): List of parameters of a model 37 | coalesce (bool, optional): Whether allreduce parameters as a whole. 38 | Defaults to True. 39 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 40 | Defaults to -1. 41 | """ 42 | grads = [ 43 | param.grad.data for param in params 44 | if param.requires_grad and param.grad is not None 45 | ] 46 | world_size = dist.get_world_size() 47 | if coalesce: 48 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 49 | else: 50 | for tensor in grads: 51 | dist.all_reduce(tensor.div_(world_size)) 52 | 53 | 54 | class DistOptimizerHook(OptimizerHook): 55 | """Deprecated optimizer hook for distributed training.""" 56 | 57 | def __init__(self, *args, **kwargs): 58 | warnings.warn('"DistOptimizerHook" is deprecated, please switch to' 59 | '"mmcv.runner.OptimizerHook".') 60 | super().__init__(*args, **kwargs) 61 | 62 | 63 | def reduce_mean(tensor): 64 | """"Obtain the mean of tensor on different GPUs.""" 65 | if not (dist.is_available() and dist.is_initialized()): 66 | return tensor 67 | tensor = tensor.clone() 68 | dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) 69 | return tensor 70 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import numpy as np 4 | import torch 5 | from six.moves import map, zip 6 | 7 | from ..mask.structures import BitmapMasks, PolygonMasks 8 | 9 | 10 | def multi_apply(func, *args, **kwargs): 11 | """Apply function to a list of arguments. 12 | 13 | Note: 14 | This function applies the ``func`` to multiple inputs and 15 | map the multiple outputs of the ``func`` into different 16 | list. Each list contains the same type of outputs corresponding 17 | to different inputs. 18 | 19 | Args: 20 | func (Function): A function that will be applied to a list of 21 | arguments 22 | 23 | Returns: 24 | tuple(list): A tuple containing multiple list, each list contains \ 25 | a kind of returned results by the function 26 | """ 27 | pfunc = partial(func, **kwargs) if kwargs else func 28 | map_results = map(pfunc, *args) 29 | return tuple(map(list, zip(*map_results))) 30 | 31 | 32 | def unmap(data, count, inds, fill=0): 33 | """Unmap a subset of item (data) back to the original set of items (of size 34 | count)""" 35 | if data.dim() == 1: 36 | ret = data.new_full((count, ), fill) 37 | ret[inds.type(torch.bool)] = data 38 | else: 39 | new_size = (count, ) + data.size()[1:] 40 | ret = data.new_full(new_size, fill) 41 | ret[inds.type(torch.bool), :] = data 42 | return ret 43 | 44 | 45 | def mask2ndarray(mask): 46 | """Convert Mask to ndarray.. 47 | 48 | Args: 49 | mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or 50 | torch.Tensor or np.ndarray): The mask to be converted. 51 | 52 | Returns: 53 | np.ndarray: Ndarray mask of shape (n, h, w) that has been converted 54 | """ 55 | if isinstance(mask, (BitmapMasks, PolygonMasks)): 56 | mask = mask.to_ndarray() 57 | elif isinstance(mask, torch.Tensor): 58 | mask = mask.detach().cpu().numpy() 59 | elif not isinstance(mask, np.ndarray): 60 | raise TypeError(f'Unsupported {type(mask)} data type') 61 | return mask 62 | -------------------------------------------------------------------------------- /mmdet/core/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import (color_val_matplotlib, imshow_det_bboxes, 2 | imshow_gt_det_bboxes) 3 | 4 | __all__ = ['imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 2 | from .coco import CocoDataset 3 | from .coco_split import CocoSplitDataset 4 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, 5 | RepeatDataset) 6 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 7 | from .utils import get_loading_pipeline, replace_ImageToTensor 8 | 9 | __all__ = [ 10 | 'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler', 11 | 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 12 | 'ClassBalancedDataset', 'DATASETS', 'PIPELINES', 13 | 'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline' 14 | 'CocoSplitDataset' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform, 2 | ContrastTransform, EqualizeTransform, Rotate, Shear, 3 | Translate) 4 | from .compose import Compose 5 | from .formating import (Collect, DefaultFormatBundle, ImageToTensor, 6 | ToDataContainer, ToTensor, Transpose, to_tensor) 7 | from .instaboost import InstaBoost 8 | from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam, 9 | LoadMultiChannelImageFromFiles, LoadProposals) 10 | from .test_time_aug import MultiScaleFlipAug 11 | from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize, 12 | Pad, PhotoMetricDistortion, RandomCenterCropPad, 13 | RandomCrop, RandomFlip, Resize, SegRescale) 14 | 15 | __all__ = [ 16 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 17 | 'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations', 18 | 'LoadImageFromFile', 'LoadImageFromWebcam', 19 | 'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug', 20 | 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale', 21 | 'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu', 22 | 'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'CutOut', 'Shear', 23 | 'Rotate', 'ColorTransform', 'EqualizeTransform', 'BrightnessTransform', 24 | 'ContrastTransform', 'Translate' 25 | ] 26 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose(object): 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/instaboost.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..builder import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module() 7 | class InstaBoost(object): 8 | r"""Data augmentation method in `InstaBoost: Boosting Instance 9 | Segmentation Via Probability Map Guided Copy-Pasting 10 | `_. 11 | 12 | Refer to https://github.com/GothicAi/Instaboost for implementation details. 13 | """ 14 | 15 | def __init__(self, 16 | action_candidate=('normal', 'horizontal', 'skip'), 17 | action_prob=(1, 0, 0), 18 | scale=(0.8, 1.2), 19 | dx=15, 20 | dy=15, 21 | theta=(-1, 1), 22 | color_prob=0.5, 23 | hflag=False, 24 | aug_ratio=0.5): 25 | try: 26 | import instaboostfast as instaboost 27 | except ImportError: 28 | raise ImportError( 29 | 'Please run "pip install instaboostfast" ' 30 | 'to install instaboostfast first for instaboost augmentation.') 31 | self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob, 32 | scale, dx, dy, theta, 33 | color_prob, hflag) 34 | self.aug_ratio = aug_ratio 35 | 36 | def _load_anns(self, results): 37 | labels = results['ann_info']['labels'] 38 | masks = results['ann_info']['masks'] 39 | bboxes = results['ann_info']['bboxes'] 40 | n = len(labels) 41 | 42 | anns = [] 43 | for i in range(n): 44 | label = labels[i] 45 | bbox = bboxes[i] 46 | mask = masks[i] 47 | x1, y1, x2, y2 = bbox 48 | # assert (x2 - x1) >= 1 and (y2 - y1) >= 1 49 | bbox = [x1, y1, x2 - x1, y2 - y1] 50 | anns.append({ 51 | 'category_id': label, 52 | 'segmentation': mask, 53 | 'bbox': bbox 54 | }) 55 | 56 | return anns 57 | 58 | def _parse_anns(self, results, anns, img): 59 | gt_bboxes = [] 60 | gt_labels = [] 61 | gt_masks_ann = [] 62 | for ann in anns: 63 | x1, y1, w, h = ann['bbox'] 64 | # TODO: more essential bug need to be fixed in instaboost 65 | if w <= 0 or h <= 0: 66 | continue 67 | bbox = [x1, y1, x1 + w, y1 + h] 68 | gt_bboxes.append(bbox) 69 | gt_labels.append(ann['category_id']) 70 | gt_masks_ann.append(ann['segmentation']) 71 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 72 | gt_labels = np.array(gt_labels, dtype=np.int64) 73 | results['ann_info']['labels'] = gt_labels 74 | results['ann_info']['bboxes'] = gt_bboxes 75 | results['ann_info']['masks'] = gt_masks_ann 76 | results['img'] = img 77 | return results 78 | 79 | def __call__(self, results): 80 | img = results['img'] 81 | orig_type = img.dtype 82 | anns = self._load_anns(results) 83 | if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]): 84 | try: 85 | import instaboostfast as instaboost 86 | except ImportError: 87 | raise ImportError('Please run "pip install instaboostfast" ' 88 | 'to install instaboostfast first.') 89 | anns, img = instaboost.get_new_data( 90 | anns, img.astype(np.uint8), self.cfg, background=None) 91 | 92 | results = self._parse_anns(results, anns, img.astype(orig_type)) 93 | return results 94 | 95 | def __repr__(self): 96 | repr_str = self.__class__.__name__ 97 | repr_str += f'(cfg={self.cfg}, aug_ratio={self.aug_ratio})' 98 | return repr_str 99 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | 7 | class DistributedSampler(_DistributedSampler): 8 | 9 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 10 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 11 | self.shuffle = shuffle 12 | 13 | def __iter__(self): 14 | # deterministically shuffle based on epoch 15 | if self.shuffle: 16 | g = torch.Generator() 17 | g.manual_seed(self.epoch) 18 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 19 | else: 20 | indices = torch.arange(len(self.dataset)).tolist() 21 | 22 | # add extra samples to make it evenly divisible 23 | # in case that indices is shorter than half of total_size 24 | indices = (indices * 25 | math.ceil(self.total_size / len(indices)))[:self.total_size] 26 | assert len(indices) == self.total_size 27 | 28 | # subsample 29 | indices = indices[self.rank:self.total_size:self.num_replicas] 30 | assert len(indices) == self.num_samples 31 | 32 | return iter(indices) 33 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 3 | ROI_EXTRACTORS, SHARED_HEADS, build_backbone, 4 | build_detector, build_head, build_loss, build_neck, 5 | build_roi_extractor, build_shared_head) 6 | from .dense_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .losses import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .roi_heads import * # noqa: F401,F403 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 14 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 15 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnest import ResNeSt 9 | from .resnet import ResNet, ResNetV1d 10 | from .resnext import ResNeXt 11 | from .ssd_vgg import SSDVGG 12 | from .trident_resnet import TridentResNet 13 | 14 | __all__ = [ 15 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 16 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 17 | 'ResNeSt', 'TridentResNet' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmcv.utils import Registry, build_from_cfg 4 | from torch import nn 5 | 6 | BACKBONES = Registry('backbone') 7 | NECKS = Registry('neck') 8 | ROI_EXTRACTORS = Registry('roi_extractor') 9 | SHARED_HEADS = Registry('shared_head') 10 | HEADS = Registry('head') 11 | LOSSES = Registry('loss') 12 | DETECTORS = Registry('detector') 13 | 14 | 15 | def build(cfg, registry, default_args=None): 16 | """Build a module. 17 | 18 | Args: 19 | cfg (dict, list[dict]): The config of modules, is is either a dict 20 | or a list of configs. 21 | registry (:obj:`Registry`): A registry the module belongs to. 22 | default_args (dict, optional): Default arguments to build the module. 23 | Defaults to None. 24 | 25 | Returns: 26 | nn.Module: A built nn module. 27 | """ 28 | if isinstance(cfg, list): 29 | modules = [ 30 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 31 | ] 32 | return nn.Sequential(*modules) 33 | else: 34 | return build_from_cfg(cfg, registry, default_args) 35 | 36 | 37 | def build_backbone(cfg): 38 | """Build backbone.""" 39 | return build(cfg, BACKBONES) 40 | 41 | 42 | def build_neck(cfg): 43 | """Build neck.""" 44 | return build(cfg, NECKS) 45 | 46 | 47 | def build_roi_extractor(cfg): 48 | """Build roi extractor.""" 49 | return build(cfg, ROI_EXTRACTORS) 50 | 51 | 52 | def build_shared_head(cfg): 53 | """Build shared head.""" 54 | return build(cfg, SHARED_HEADS) 55 | 56 | 57 | def build_head(cfg): 58 | """Build head.""" 59 | return build(cfg, HEADS) 60 | 61 | 62 | def build_loss(cfg): 63 | """Build loss.""" 64 | return build(cfg, LOSSES) 65 | 66 | 67 | def build_detector(cfg, train_cfg=None, test_cfg=None): 68 | """Build detector.""" 69 | if train_cfg is not None or test_cfg is not None: 70 | warnings.warn( 71 | 'train_cfg and test_cfg is deprecated, ' 72 | 'please specify them in model', UserWarning) 73 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 74 | 'train_cfg specified in both outer field and model field ' 75 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 76 | 'test_cfg specified in both outer field and model field ' 77 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 78 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | from .oln_rpn_head import OlnRPNHead 4 | 5 | 6 | __all__ = [ 7 | 'AnchorHead', 'RPNHead', 'OlnRPNHead', 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/base_dense_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta): 7 | """Base class for DenseHeads.""" 8 | 9 | def __init__(self): 10 | super(BaseDenseHead, self).__init__() 11 | 12 | @abstractmethod 13 | def loss(self, **kwargs): 14 | """Compute losses of the head.""" 15 | pass 16 | 17 | @abstractmethod 18 | def get_bboxes(self, **kwargs): 19 | """Transform network output for a batch into bbox predictions.""" 20 | pass 21 | 22 | def forward_train(self, 23 | x, 24 | img_metas, 25 | gt_bboxes, 26 | gt_labels=None, 27 | gt_bboxes_ignore=None, 28 | proposal_cfg=None, 29 | **kwargs): 30 | """ 31 | Args: 32 | x (list[Tensor]): Features from FPN. 33 | img_metas (list[dict]): Meta information of each image, e.g., 34 | image size, scaling factor, etc. 35 | gt_bboxes (Tensor): Ground truth bboxes of the image, 36 | shape (num_gts, 4). 37 | gt_labels (Tensor): Ground truth labels of each box, 38 | shape (num_gts,). 39 | gt_bboxes_ignore (Tensor): Ground truth bboxes to be 40 | ignored, shape (num_ignored_gts, 4). 41 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 42 | if None, test_cfg would be used 43 | 44 | Returns: 45 | tuple: 46 | losses: (dict[str, Tensor]): A dictionary of loss components. 47 | proposal_list (list[Tensor]): Proposals of each image. 48 | """ 49 | outs = self(x) 50 | if gt_labels is None: 51 | loss_inputs = outs + (gt_bboxes, img_metas) 52 | else: 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) 54 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | if proposal_cfg is None: 56 | return losses 57 | else: 58 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 59 | return losses, proposal_list 60 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/rpn_test_mixin.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from mmdet.core import merge_aug_proposals 4 | 5 | if sys.version_info >= (3, 7): 6 | from mmdet.utils.contextmanagers import completed 7 | 8 | 9 | class RPNTestMixin(object): 10 | """Test methods of RPN.""" 11 | 12 | if sys.version_info >= (3, 7): 13 | 14 | async def async_simple_test_rpn(self, x, img_metas): 15 | sleep_interval = self.test_cfg.pop('async_sleep_interval', 0.025) 16 | async with completed( 17 | __name__, 'rpn_head_forward', 18 | sleep_interval=sleep_interval): 19 | rpn_outs = self(x) 20 | 21 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 22 | return proposal_list 23 | 24 | def simple_test_rpn(self, x, img_metas): 25 | """Test without augmentation. 26 | 27 | Args: 28 | x (tuple[Tensor]): Features from the upstream network, each is 29 | a 4D-tensor. 30 | img_metas (list[dict]): Meta info of each image. 31 | 32 | Returns: 33 | list[Tensor]: Proposals of each image. 34 | """ 35 | rpn_outs = self(x) 36 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 37 | return proposal_list 38 | 39 | def aug_test_rpn(self, feats, img_metas): 40 | samples_per_gpu = len(img_metas[0]) 41 | aug_proposals = [[] for _ in range(samples_per_gpu)] 42 | for x, img_meta in zip(feats, img_metas): 43 | proposal_list = self.simple_test_rpn(x, img_meta) 44 | for i, proposals in enumerate(proposal_list): 45 | aug_proposals[i].append(proposals) 46 | # reorganize the order of 'img_metas' to match the dimensions 47 | # of 'aug_proposals' 48 | aug_img_metas = [] 49 | for i in range(samples_per_gpu): 50 | aug_img_meta = [] 51 | for j in range(len(img_metas)): 52 | aug_img_meta.append(img_metas[j][i]) 53 | aug_img_metas.append(aug_img_meta) 54 | # after merging, proposals will be rescaled to the original image size 55 | merged_proposals = [ 56 | merge_aug_proposals(proposals, aug_img_meta, self.test_cfg) 57 | for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas) 58 | ] 59 | return merged_proposals 60 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .faster_rcnn import FasterRCNN 3 | from .mask_rcnn import MaskRCNN 4 | from .rpn import RPN 5 | from .two_stage import TwoStageDetector 6 | # 7 | from .rpn_detector import RPNDetector 8 | __all__ = [ 9 | 'BaseDetector', 'TwoStageDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 10 | 'RPNDetector' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FasterRCNN(TwoStageDetector): 7 | """Implementation of `Faster R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None): 17 | super(FasterRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | roi_head=roi_head, 22 | train_cfg=train_cfg, 23 | test_cfg=test_cfg, 24 | pretrained=pretrained) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskRCNN(TwoStageDetector): 7 | """Implementation of `Mask R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None): 17 | super(MaskRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | roi_head=roi_head, 22 | train_cfg=train_cfg, 23 | test_cfg=test_cfg, 24 | pretrained=pretrained) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core import bbox2result 4 | from ..builder import DETECTORS 5 | from .rpn import RPN 6 | 7 | 8 | @DETECTORS.register_module() 9 | class RPNDetector(RPN): 10 | 11 | def simple_test(self, img, img_metas, rescale=False): 12 | """Test function without test time augmentation. 13 | 14 | Args: 15 | imgs (list[torch.Tensor]): List of multiple images 16 | img_metas (list[dict]): List of image information. 17 | rescale (bool, optional): Whether to rescale the results. 18 | Defaults to False. 19 | 20 | Returns: 21 | list[np.ndarray]: proposals 22 | """ 23 | x = self.extract_feat(img) 24 | proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) 25 | if rescale: 26 | for proposals, meta in zip(proposal_list, img_metas): 27 | proposals[:, :4] /= proposals.new_tensor(meta['scale_factor']) 28 | 29 | # Convert the rpn-proposals into bbox results format. < 30 | # proposal_list[0].shape = [200,5] 31 | bbox_results = [] 32 | for det_bboxes in proposal_list: 33 | det_labels = torch.zeros((det_bboxes.size(0))).to( 34 | det_bboxes.device) 35 | bbox_results.append( 36 | bbox2result(det_bboxes, det_labels, num_classes=1)) 37 | 38 | return bbox_results 39 | # > -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .ae_loss import AssociativeEmbeddingLoss 3 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 4 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 5 | cross_entropy, mask_cross_entropy) 6 | from .focal_loss import FocalLoss, sigmoid_focal_loss 7 | from .gaussian_focal_loss import GaussianFocalLoss 8 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss 9 | from .ghm_loss import GHMC, GHMR 10 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss, 11 | bounded_iou_loss, iou_loss) 12 | from .mse_loss import MSELoss, mse_loss 13 | from .pisa_loss import carl_loss, isr_p 14 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 15 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 16 | from .varifocal_loss import VarifocalLoss 17 | 18 | __all__ = [ 19 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 20 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 21 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 22 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 23 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC', 24 | 'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 25 | 'l1_loss', 'isr_p', 'carl_loss', 'AssociativeEmbeddingLoss', 26 | 'GaussianFocalLoss', 'QualityFocalLoss', 'DistributionFocalLoss', 27 | 'VarifocalLoss' 28 | ] 29 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1, thresh=None): 5 | """Calculate accuracy according to the prediction and target. 6 | 7 | Args: 8 | pred (torch.Tensor): The model prediction, shape (N, num_class) 9 | target (torch.Tensor): The target of each prediction, shape (N, ) 10 | topk (int | tuple[int], optional): If the predictions in ``topk`` 11 | matches the target, the predictions will be regarded as 12 | correct ones. Defaults to 1. 13 | thresh (float, optional): If not None, predictions with scores under 14 | this threshold are considered incorrect. Default to None. 15 | 16 | Returns: 17 | float | tuple[float]: If the input ``topk`` is a single integer, 18 | the function will return a single float as accuracy. If 19 | ``topk`` is a tuple containing multiple integers, the 20 | function will return a tuple containing accuracies of 21 | each ``topk`` number. 22 | """ 23 | assert isinstance(topk, (int, tuple)) 24 | if isinstance(topk, int): 25 | topk = (topk, ) 26 | return_single = True 27 | else: 28 | return_single = False 29 | 30 | maxk = max(topk) 31 | if pred.size(0) == 0: 32 | accu = [pred.new_tensor(0.) for i in range(len(topk))] 33 | return accu[0] if return_single else accu 34 | assert pred.ndim == 2 and target.ndim == 1 35 | assert pred.size(0) == target.size(0) 36 | assert maxk <= pred.size(1), \ 37 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 38 | pred_value, pred_label = pred.topk(maxk, dim=1) 39 | pred_label = pred_label.t() # transpose to shape (maxk, N) 40 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 41 | if thresh is not None: 42 | # Only prediction values larger than thresh are counted as correct 43 | correct = correct & (pred_value > thresh).t() 44 | res = [] 45 | for k in topk: 46 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 47 | res.append(correct_k.mul_(100.0 / pred.size(0))) 48 | return res[0] if return_single else res 49 | 50 | 51 | class Accuracy(nn.Module): 52 | 53 | def __init__(self, topk=(1, ), thresh=None): 54 | """Module to calculate the accuracy. 55 | 56 | Args: 57 | topk (tuple, optional): The criterion used to calculate the 58 | accuracy. Defaults to (1,). 59 | thresh (float, optional): If not None, predictions with scores 60 | under this threshold are considered incorrect. Default to None. 61 | """ 62 | super().__init__() 63 | self.topk = topk 64 | self.thresh = thresh 65 | 66 | def forward(self, pred, target): 67 | """Forward function to calculate accuracy. 68 | 69 | Args: 70 | pred (torch.Tensor): Prediction of models. 71 | target (torch.Tensor): Target for each prediction. 72 | 73 | Returns: 74 | tuple[float]: The accuracies under different topk criterions. 75 | """ 76 | return accuracy(pred, target, self.topk, self.thresh) 77 | -------------------------------------------------------------------------------- /mmdet/models/losses/gaussian_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from ..builder import LOSSES 4 | from .utils import weighted_loss 5 | 6 | 7 | @weighted_loss 8 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): 9 | """`Focal Loss `_ for targets in gaussian 10 | distribution. 11 | 12 | Args: 13 | pred (torch.Tensor): The prediction. 14 | gaussian_target (torch.Tensor): The learning target of the prediction 15 | in gaussian distribution. 16 | alpha (float, optional): A balanced form for Focal Loss. 17 | Defaults to 2.0. 18 | gamma (float, optional): The gamma for calculating the modulating 19 | factor. Defaults to 4.0. 20 | """ 21 | eps = 1e-12 22 | pos_weights = gaussian_target.eq(1) 23 | neg_weights = (1 - gaussian_target).pow(gamma) 24 | pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights 25 | neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights 26 | return pos_loss + neg_loss 27 | 28 | 29 | @LOSSES.register_module() 30 | class GaussianFocalLoss(nn.Module): 31 | """GaussianFocalLoss is a variant of focal loss. 32 | 33 | More details can be found in the `paper 34 | `_ 35 | Code is modified from `kp_utils.py 36 | `_ # noqa: E501 37 | Please notice that the target in GaussianFocalLoss is a gaussian heatmap, 38 | not 0/1 binary target. 39 | 40 | Args: 41 | alpha (float): Power of prediction. 42 | gamma (float): Power of target for negtive samples. 43 | reduction (str): Options are "none", "mean" and "sum". 44 | loss_weight (float): Loss weight of current loss. 45 | """ 46 | 47 | def __init__(self, 48 | alpha=2.0, 49 | gamma=4.0, 50 | reduction='mean', 51 | loss_weight=1.0): 52 | super(GaussianFocalLoss, self).__init__() 53 | self.alpha = alpha 54 | self.gamma = gamma 55 | self.reduction = reduction 56 | self.loss_weight = loss_weight 57 | 58 | def forward(self, 59 | pred, 60 | target, 61 | weight=None, 62 | avg_factor=None, 63 | reduction_override=None): 64 | """Forward function. 65 | 66 | Args: 67 | pred (torch.Tensor): The prediction. 68 | target (torch.Tensor): The learning target of the prediction 69 | in gaussian distribution. 70 | weight (torch.Tensor, optional): The weight of loss for each 71 | prediction. Defaults to None. 72 | avg_factor (int, optional): Average factor that is used to average 73 | the loss. Defaults to None. 74 | reduction_override (str, optional): The reduction method used to 75 | override the original reduction method of the loss. 76 | Defaults to None. 77 | """ 78 | assert reduction_override in (None, 'none', 'mean', 'sum') 79 | reduction = ( 80 | reduction_override if reduction_override else self.reduction) 81 | loss_reg = self.loss_weight * gaussian_focal_loss( 82 | pred, 83 | target, 84 | weight, 85 | alpha=self.alpha, 86 | gamma=self.gamma, 87 | reduction=reduction, 88 | avg_factor=avg_factor) 89 | return loss_reg 90 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def mse_loss(pred, target): 10 | """Warpper of mse loss.""" 11 | return F.mse_loss(pred, target, reduction='none') 12 | 13 | 14 | @LOSSES.register_module() 15 | class MSELoss(nn.Module): 16 | """MSELoss. 17 | 18 | Args: 19 | reduction (str, optional): The method that reduces the loss to a 20 | scalar. Options are "none", "mean" and "sum". 21 | loss_weight (float, optional): The weight of the loss. Defaults to 1.0 22 | """ 23 | 24 | def __init__(self, reduction='mean', loss_weight=1.0): 25 | super().__init__() 26 | self.reduction = reduction 27 | self.loss_weight = loss_weight 28 | 29 | def forward(self, pred, target, weight=None, avg_factor=None): 30 | """Forward function of loss. 31 | 32 | Args: 33 | pred (torch.Tensor): The prediction. 34 | target (torch.Tensor): The learning target of the prediction. 35 | weight (torch.Tensor, optional): Weight of the loss for each 36 | prediction. Defaults to None. 37 | avg_factor (int, optional): Average factor that is used to average 38 | the loss. Defaults to None. 39 | 40 | Returns: 41 | torch.Tensor: The calculated loss 42 | """ 43 | loss = self.loss_weight * mse_loss( 44 | pred, 45 | target, 46 | weight, 47 | reduction=self.reduction, 48 | avg_factor=avg_factor) 49 | return loss 50 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, 88 | target, 89 | weight=None, 90 | reduction='mean', 91 | avg_factor=None, 92 | **kwargs): 93 | # get element-wise loss 94 | loss = loss_func(pred, target, **kwargs) 95 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 96 | return loss 97 | 98 | return wrapper 99 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | 3 | __all__ = [ 4 | 'FPN', 5 | ] 6 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_head import BaseRoIHead 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, Shared2FCBBoxHead, 3 | Shared4Conv1FCBBoxHead) 4 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FusedSemanticHead, 5 | GridHead, HTCMaskHead, MaskIoUHead, MaskPointHead) 6 | from .roi_extractors import SingleRoIExtractor 7 | from .shared_heads import ResLayer 8 | from .standard_roi_head import StandardRoIHead 9 | 10 | from .oln_roi_head import OlnRoIHead 11 | 12 | __all__ = [ 13 | 'BaseRoIHead', 'ResLayer', 'BBoxHead', 14 | 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'StandardRoIHead', 15 | 'Shared4Conv1FCBBoxHead', 'FCNMaskHead', 16 | 'SingleRoIExtractor', 'OlnRoIHead' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/base_roi_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | 5 | from ..builder import build_shared_head 6 | 7 | 8 | class BaseRoIHead(nn.Module, metaclass=ABCMeta): 9 | """Base class for RoIHeads.""" 10 | 11 | def __init__(self, 12 | bbox_roi_extractor=None, 13 | bbox_head=None, 14 | mask_roi_extractor=None, 15 | mask_head=None, 16 | shared_head=None, 17 | train_cfg=None, 18 | test_cfg=None): 19 | super(BaseRoIHead, self).__init__() 20 | self.train_cfg = train_cfg 21 | self.test_cfg = test_cfg 22 | if shared_head is not None: 23 | self.shared_head = build_shared_head(shared_head) 24 | 25 | if bbox_head is not None: 26 | self.init_bbox_head(bbox_roi_extractor, bbox_head) 27 | 28 | if mask_head is not None: 29 | self.init_mask_head(mask_roi_extractor, mask_head) 30 | 31 | self.init_assigner_sampler() 32 | 33 | @property 34 | def with_bbox(self): 35 | """bool: whether the RoI head contains a `bbox_head`""" 36 | return hasattr(self, 'bbox_head') and self.bbox_head is not None 37 | 38 | @property 39 | def with_mask(self): 40 | """bool: whether the RoI head contains a `mask_head`""" 41 | return hasattr(self, 'mask_head') and self.mask_head is not None 42 | 43 | @property 44 | def with_shared_head(self): 45 | """bool: whether the RoI head contains a `shared_head`""" 46 | return hasattr(self, 'shared_head') and self.shared_head is not None 47 | 48 | @abstractmethod 49 | def init_weights(self, pretrained): 50 | """Initialize the weights in head. 51 | 52 | Args: 53 | pretrained (str, optional): Path to pre-trained weights. 54 | Defaults to None. 55 | """ 56 | pass 57 | 58 | @abstractmethod 59 | def init_bbox_head(self): 60 | """Initialize ``bbox_head``""" 61 | pass 62 | 63 | @abstractmethod 64 | def init_mask_head(self): 65 | """Initialize ``mask_head``""" 66 | pass 67 | 68 | @abstractmethod 69 | def init_assigner_sampler(self): 70 | """Initialize assigner and sampler.""" 71 | pass 72 | 73 | @abstractmethod 74 | def forward_train(self, 75 | x, 76 | img_meta, 77 | proposal_list, 78 | gt_bboxes, 79 | gt_labels, 80 | gt_bboxes_ignore=None, 81 | gt_masks=None, 82 | **kwargs): 83 | """Forward function during training.""" 84 | pass 85 | 86 | async def async_simple_test(self, x, img_meta, **kwargs): 87 | """Asynchronized test function.""" 88 | raise NotImplementedError 89 | 90 | def simple_test(self, 91 | x, 92 | proposal_list, 93 | img_meta, 94 | proposals=None, 95 | rescale=False, 96 | **kwargs): 97 | """Test without augmentation.""" 98 | pass 99 | 100 | def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs): 101 | """Test with augmentations. 102 | 103 | If rescale is False, then returned bboxes and masks will fit the scale 104 | of imgs[0]. 105 | """ 106 | pass 107 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, 3 | Shared4Conv1FCBBoxHead) 4 | from .convfc_bbox_score_head import (ConvFCBBoxScoreHead, 5 | Shared2FCBBoxScoreHead) 6 | __all__ = [ 7 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 8 | 'Shared4Conv1FCBBoxHead', 'ConvFCBBoxScoreHead', 'Shared2FCBBoxScoreHead' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .coarse_mask_head import CoarseMaskHead 2 | from .fcn_mask_head import FCNMaskHead 3 | from .fused_semantic_head import FusedSemanticHead 4 | from .grid_head import GridHead 5 | from .htc_mask_head import HTCMaskHead 6 | from .mask_point_head import MaskPointHead 7 | from .maskiou_head import MaskIoUHead 8 | 9 | __all__ = [ 10 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 11 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/coarse_mask_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import ConvModule, Linear, constant_init, xavier_init 3 | from mmcv.runner import auto_fp16 4 | 5 | from mmdet.models.builder import HEADS 6 | from .fcn_mask_head import FCNMaskHead 7 | 8 | 9 | @HEADS.register_module() 10 | class CoarseMaskHead(FCNMaskHead): 11 | """Coarse mask head used in PointRend. 12 | 13 | Compared with standard ``FCNMaskHead``, ``CoarseMaskHead`` will downsample 14 | the input feature map instead of upsample it. 15 | 16 | Args: 17 | num_convs (int): Number of conv layers in the head. Default: 0. 18 | num_fcs (int): Number of fc layers in the head. Default: 2. 19 | fc_out_channels (int): Number of output channels of fc layer. 20 | Default: 1024. 21 | downsample_factor (int): The factor that feature map is downsampled by. 22 | Default: 2. 23 | """ 24 | 25 | def __init__(self, 26 | num_convs=0, 27 | num_fcs=2, 28 | fc_out_channels=1024, 29 | downsample_factor=2, 30 | *arg, 31 | **kwarg): 32 | super(CoarseMaskHead, self).__init__( 33 | *arg, num_convs=num_convs, upsample_cfg=dict(type=None), **kwarg) 34 | self.num_fcs = num_fcs 35 | assert self.num_fcs > 0 36 | self.fc_out_channels = fc_out_channels 37 | self.downsample_factor = downsample_factor 38 | assert self.downsample_factor >= 1 39 | # remove conv_logit 40 | delattr(self, 'conv_logits') 41 | 42 | if downsample_factor > 1: 43 | downsample_in_channels = ( 44 | self.conv_out_channels 45 | if self.num_convs > 0 else self.in_channels) 46 | self.downsample_conv = ConvModule( 47 | downsample_in_channels, 48 | self.conv_out_channels, 49 | kernel_size=downsample_factor, 50 | stride=downsample_factor, 51 | padding=0, 52 | conv_cfg=self.conv_cfg, 53 | norm_cfg=self.norm_cfg) 54 | else: 55 | self.downsample_conv = None 56 | 57 | self.output_size = (self.roi_feat_size[0] // downsample_factor, 58 | self.roi_feat_size[1] // downsample_factor) 59 | self.output_area = self.output_size[0] * self.output_size[1] 60 | 61 | last_layer_dim = self.conv_out_channels * self.output_area 62 | 63 | self.fcs = nn.ModuleList() 64 | for i in range(num_fcs): 65 | fc_in_channels = ( 66 | last_layer_dim if i == 0 else self.fc_out_channels) 67 | self.fcs.append(Linear(fc_in_channels, self.fc_out_channels)) 68 | last_layer_dim = self.fc_out_channels 69 | output_channels = self.num_classes * self.output_area 70 | self.fc_logits = Linear(last_layer_dim, output_channels) 71 | 72 | def init_weights(self): 73 | for m in self.fcs.modules(): 74 | if isinstance(m, nn.Linear): 75 | xavier_init(m) 76 | constant_init(self.fc_logits, 0.001) 77 | 78 | @auto_fp16() 79 | def forward(self, x): 80 | for conv in self.convs: 81 | x = conv(x) 82 | 83 | if self.downsample_conv is not None: 84 | x = self.downsample_conv(x) 85 | 86 | x = x.flatten(1) 87 | for fc in self.fcs: 88 | x = self.relu(fc(x)) 89 | mask_pred = self.fc_logits(x).view( 90 | x.size(0), self.num_classes, *self.output_size) 91 | return mask_pred 92 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import ConvModule 2 | 3 | from mmdet.models.builder import HEADS 4 | from .fcn_mask_head import FCNMaskHead 5 | 6 | 7 | @HEADS.register_module() 8 | class HTCMaskHead(FCNMaskHead): 9 | 10 | def __init__(self, with_conv_res=True, *args, **kwargs): 11 | super(HTCMaskHead, self).__init__(*args, **kwargs) 12 | self.with_conv_res = with_conv_res 13 | if self.with_conv_res: 14 | self.conv_res = ConvModule( 15 | self.conv_out_channels, 16 | self.conv_out_channels, 17 | 1, 18 | conv_cfg=self.conv_cfg, 19 | norm_cfg=self.norm_cfg) 20 | 21 | def init_weights(self): 22 | super(HTCMaskHead, self).init_weights() 23 | if self.with_conv_res: 24 | self.conv_res.init_weights() 25 | 26 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 27 | if res_feat is not None: 28 | assert self.with_conv_res 29 | res_feat = self.conv_res(res_feat) 30 | x = x + res_feat 31 | for conv in self.convs: 32 | x = conv(x) 33 | res_feat = x 34 | outs = [] 35 | if return_logits: 36 | x = self.upsample(x) 37 | if self.upsample_method == 'deconv': 38 | x = self.relu(x) 39 | mask_pred = self.conv_logits(x) 40 | outs.append(mask_pred) 41 | if return_feat: 42 | outs.append(res_feat) 43 | return outs if len(outs) > 1 else outs[0] 44 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .generic_roi_extractor import GenericRoIExtractor 2 | from .single_level_roi_extractor import SingleRoIExtractor 3 | 4 | __all__ = [ 5 | 'SingleRoIExtractor', 6 | 'GenericRoIExtractor', 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv import ops 6 | 7 | 8 | class BaseRoIExtractor(nn.Module, metaclass=ABCMeta): 9 | """Base class for RoI extractor. 10 | 11 | Args: 12 | roi_layer (dict): Specify RoI layer type and arguments. 13 | out_channels (int): Output channels of RoI layers. 14 | featmap_strides (int): Strides of input feature maps. 15 | """ 16 | 17 | def __init__(self, roi_layer, out_channels, featmap_strides): 18 | super(BaseRoIExtractor, self).__init__() 19 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 20 | self.out_channels = out_channels 21 | self.featmap_strides = featmap_strides 22 | self.fp16_enabled = False 23 | 24 | @property 25 | def num_inputs(self): 26 | """int: Number of input feature maps.""" 27 | return len(self.featmap_strides) 28 | 29 | def init_weights(self): 30 | pass 31 | 32 | def build_roi_layers(self, layer_cfg, featmap_strides): 33 | """Build RoI operator to extract feature from each level feature map. 34 | 35 | Args: 36 | layer_cfg (dict): Dictionary to construct and config RoI layer 37 | operation. Options are modules under ``mmcv/ops`` such as 38 | ``RoIAlign``. 39 | featmap_strides (int): The stride of input feature map w.r.t to the 40 | original image size, which would be used to scale RoI 41 | coordinate (original image coordinate system) to feature 42 | coordinate system. 43 | 44 | Returns: 45 | nn.ModuleList: The RoI extractor modules for each level feature 46 | map. 47 | """ 48 | 49 | cfg = layer_cfg.copy() 50 | layer_type = cfg.pop('type') 51 | assert hasattr(ops, layer_type) 52 | layer_cls = getattr(ops, layer_type) 53 | roi_layers = nn.ModuleList( 54 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 55 | return roi_layers 56 | 57 | def roi_rescale(self, rois, scale_factor): 58 | """Scale RoI coordinates by scale factor. 59 | 60 | Args: 61 | rois (torch.Tensor): RoI (Region of Interest), shape (n, 5) 62 | scale_factor (float): Scale factor that RoI will be multiplied by. 63 | 64 | Returns: 65 | torch.Tensor: Scaled RoI. 66 | """ 67 | 68 | cx = (rois[:, 1] + rois[:, 3]) * 0.5 69 | cy = (rois[:, 2] + rois[:, 4]) * 0.5 70 | w = rois[:, 3] - rois[:, 1] 71 | h = rois[:, 4] - rois[:, 2] 72 | new_w = w * scale_factor 73 | new_h = h * scale_factor 74 | x1 = cx - new_w * 0.5 75 | x2 = cx + new_w * 0.5 76 | y1 = cy - new_h * 0.5 77 | y2 = cy + new_h * 0.5 78 | new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1) 79 | return new_rois 80 | 81 | @abstractmethod 82 | def forward(self, feats, rois, roi_scale_factor=None): 83 | pass 84 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn.bricks import build_plugin_layer 2 | from mmcv.runner import force_fp32 3 | 4 | from mmdet.models.builder import ROI_EXTRACTORS 5 | from .base_roi_extractor import BaseRoIExtractor 6 | 7 | 8 | @ROI_EXTRACTORS.register_module() 9 | class GenericRoIExtractor(BaseRoIExtractor): 10 | """Extract RoI features from all level feature maps levels. 11 | 12 | This is the implementation of `A novel Region of Interest Extraction Layer 13 | for Instance Segmentation `_. 14 | 15 | Args: 16 | aggregation (str): The method to aggregate multiple feature maps. 17 | Options are 'sum', 'concat'. Default: 'sum'. 18 | pre_cfg (dict | None): Specify pre-processing modules. Default: None. 19 | post_cfg (dict | None): Specify post-processing modules. Default: None. 20 | kwargs (keyword arguments): Arguments that are the same 21 | as :class:`BaseRoIExtractor`. 22 | """ 23 | 24 | def __init__(self, 25 | aggregation='sum', 26 | pre_cfg=None, 27 | post_cfg=None, 28 | **kwargs): 29 | super(GenericRoIExtractor, self).__init__(**kwargs) 30 | 31 | assert aggregation in ['sum', 'concat'] 32 | 33 | self.aggregation = aggregation 34 | self.with_post = post_cfg is not None 35 | self.with_pre = pre_cfg is not None 36 | # build pre/post processing modules 37 | if self.with_post: 38 | self.post_module = build_plugin_layer(post_cfg, '_post_module')[1] 39 | if self.with_pre: 40 | self.pre_module = build_plugin_layer(pre_cfg, '_pre_module')[1] 41 | 42 | @force_fp32(apply_to=('feats', ), out_fp16=True) 43 | def forward(self, feats, rois, roi_scale_factor=None): 44 | """Forward function.""" 45 | if len(feats) == 1: 46 | return self.roi_layers[0](feats[0], rois) 47 | 48 | out_size = self.roi_layers[0].output_size 49 | num_levels = len(feats) 50 | roi_feats = feats[0].new_zeros( 51 | rois.size(0), self.out_channels, *out_size) 52 | 53 | # some times rois is an empty tensor 54 | if roi_feats.shape[0] == 0: 55 | return roi_feats 56 | 57 | if roi_scale_factor is not None: 58 | rois = self.roi_rescale(rois, roi_scale_factor) 59 | 60 | # mark the starting channels for concat mode 61 | start_channels = 0 62 | for i in range(num_levels): 63 | roi_feats_t = self.roi_layers[i](feats[i], rois) 64 | end_channels = start_channels + roi_feats_t.size(1) 65 | if self.with_pre: 66 | # apply pre-processing to a RoI extracted from each layer 67 | roi_feats_t = self.pre_module(roi_feats_t) 68 | if self.aggregation == 'sum': 69 | # and sum them all 70 | roi_feats += roi_feats_t 71 | else: 72 | # and concat them along channel dimension 73 | roi_feats[:, start_channels:end_channels] = roi_feats_t 74 | # update channels starting position 75 | start_channels = end_channels 76 | # check if concat channels match at the end 77 | if self.aggregation == 'concat': 78 | assert start_channels == self.out_channels 79 | 80 | if self.with_post: 81 | # apply post-processing before return the result 82 | roi_feats = self.post_module(roi_feats) 83 | return roi_feats 84 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import constant_init, kaiming_init 3 | from mmcv.runner import auto_fp16, load_checkpoint 4 | 5 | from mmdet.models.backbones import ResNet 6 | from mmdet.models.builder import SHARED_HEADS 7 | from mmdet.models.utils import ResLayer as _ResLayer 8 | from mmdet.utils import get_root_logger 9 | 10 | 11 | @SHARED_HEADS.register_module() 12 | class ResLayer(nn.Module): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None): 24 | super(ResLayer, self).__init__() 25 | self.norm_eval = norm_eval 26 | self.norm_cfg = norm_cfg 27 | self.stage = stage 28 | self.fp16_enabled = False 29 | block, stage_blocks = ResNet.arch_settings[depth] 30 | stage_block = stage_blocks[stage] 31 | planes = 64 * 2**stage 32 | inplanes = 64 * 2**(stage - 1) * block.expansion 33 | 34 | res_layer = _ResLayer( 35 | block, 36 | inplanes, 37 | planes, 38 | stage_block, 39 | stride=stride, 40 | dilation=dilation, 41 | style=style, 42 | with_cp=with_cp, 43 | norm_cfg=self.norm_cfg, 44 | dcn=dcn) 45 | self.add_module(f'layer{stage + 1}', res_layer) 46 | 47 | def init_weights(self, pretrained=None): 48 | """Initialize the weights in the module. 49 | 50 | Args: 51 | pretrained (str, optional): Path to pre-trained weights. 52 | Defaults to None. 53 | """ 54 | if isinstance(pretrained, str): 55 | logger = get_root_logger() 56 | load_checkpoint(self, pretrained, strict=False, logger=logger) 57 | elif pretrained is None: 58 | for m in self.modules(): 59 | if isinstance(m, nn.Conv2d): 60 | kaiming_init(m) 61 | elif isinstance(m, nn.BatchNorm2d): 62 | constant_init(m, 1) 63 | else: 64 | raise TypeError('pretrained must be a str or None') 65 | 66 | @auto_fp16() 67 | def forward(self, x): 68 | res_layer = getattr(self, f'layer{self.stage + 1}') 69 | out = res_layer(x) 70 | return out 71 | 72 | def train(self, mode=True): 73 | super(ResLayer, self).train(mode) 74 | if self.norm_eval: 75 | for m in self.modules(): 76 | if isinstance(m, nn.BatchNorm2d): 77 | m.eval() 78 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_positional_encoding, build_transformer 2 | from .gaussian_target import gaussian_radius, gen_gaussian_target 3 | from .positional_encoding import (LearnedPositionalEncoding, 4 | SinePositionalEncoding) 5 | from .res_layer import ResLayer 6 | from .transformer import (FFN, MultiheadAttention, Transformer, 7 | TransformerDecoder, TransformerDecoderLayer, 8 | TransformerEncoder, TransformerEncoderLayer) 9 | 10 | __all__ = [ 11 | 'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 'MultiheadAttention', 12 | 'FFN', 'TransformerEncoderLayer', 'TransformerEncoder', 13 | 'TransformerDecoderLayer', 'TransformerDecoder', 'Transformer', 14 | 'build_transformer', 'build_positional_encoding', 'SinePositionalEncoding', 15 | 'LearnedPositionalEncoding' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | TRANSFORMER = Registry('Transformer') 4 | POSITIONAL_ENCODING = Registry('Position encoding') 5 | 6 | 7 | def build_transformer(cfg, default_args=None): 8 | """Builder for Transformer.""" 9 | return build_from_cfg(cfg, TRANSFORMER, default_args) 10 | 11 | 12 | def build_positional_encoding(cfg, default_args=None): 13 | """Builder for Position Encoding.""" 14 | return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) 15 | -------------------------------------------------------------------------------- /mmdet/models/utils/res_layer.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import build_conv_layer, build_norm_layer 2 | from torch import nn as nn 3 | 4 | 5 | class ResLayer(nn.Sequential): 6 | """ResLayer to build ResNet style backbone. 7 | 8 | Args: 9 | block (nn.Module): block used to build ResLayer. 10 | inplanes (int): inplanes of block. 11 | planes (int): planes of block. 12 | num_blocks (int): number of blocks. 13 | stride (int): stride of the first block. Default: 1 14 | avg_down (bool): Use AvgPool instead of stride conv when 15 | downsampling in the bottleneck. Default: False 16 | conv_cfg (dict): dictionary to construct and config conv layer. 17 | Default: None 18 | norm_cfg (dict): dictionary to construct and config norm layer. 19 | Default: dict(type='BN') 20 | downsample_first (bool): Downsample at the first block or last block. 21 | False for Hourglass, True for ResNet. Default: True 22 | """ 23 | 24 | def __init__(self, 25 | block, 26 | inplanes, 27 | planes, 28 | num_blocks, 29 | stride=1, 30 | avg_down=False, 31 | conv_cfg=None, 32 | norm_cfg=dict(type='BN'), 33 | downsample_first=True, 34 | **kwargs): 35 | self.block = block 36 | 37 | downsample = None 38 | if stride != 1 or inplanes != planes * block.expansion: 39 | downsample = [] 40 | conv_stride = stride 41 | if avg_down: 42 | conv_stride = 1 43 | downsample.append( 44 | nn.AvgPool2d( 45 | kernel_size=stride, 46 | stride=stride, 47 | ceil_mode=True, 48 | count_include_pad=False)) 49 | downsample.extend([ 50 | build_conv_layer( 51 | conv_cfg, 52 | inplanes, 53 | planes * block.expansion, 54 | kernel_size=1, 55 | stride=conv_stride, 56 | bias=False), 57 | build_norm_layer(norm_cfg, planes * block.expansion)[1] 58 | ]) 59 | downsample = nn.Sequential(*downsample) 60 | 61 | layers = [] 62 | if downsample_first: 63 | layers.append( 64 | block( 65 | inplanes=inplanes, 66 | planes=planes, 67 | stride=stride, 68 | downsample=downsample, 69 | conv_cfg=conv_cfg, 70 | norm_cfg=norm_cfg, 71 | **kwargs)) 72 | inplanes = planes * block.expansion 73 | for _ in range(1, num_blocks): 74 | layers.append( 75 | block( 76 | inplanes=inplanes, 77 | planes=planes, 78 | stride=1, 79 | conv_cfg=conv_cfg, 80 | norm_cfg=norm_cfg, 81 | **kwargs)) 82 | 83 | else: # downsample_first=False is for HourglassModule 84 | for _ in range(num_blocks - 1): 85 | layers.append( 86 | block( 87 | inplanes=inplanes, 88 | planes=inplanes, 89 | stride=1, 90 | conv_cfg=conv_cfg, 91 | norm_cfg=norm_cfg, 92 | **kwargs)) 93 | layers.append( 94 | block( 95 | inplanes=inplanes, 96 | planes=planes, 97 | stride=stride, 98 | downsample=downsample, 99 | conv_cfg=conv_cfg, 100 | norm_cfg=norm_cfg, 101 | **kwargs)) 102 | super(ResLayer, self).__init__(*layers) 103 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import collect_env as collect_base_env 2 | from mmcv.utils import get_git_hash 3 | 4 | import mmdet 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['MMDetection'] = mmdet.__version__ + '+' + get_git_hash()[:7] 11 | return env_info 12 | 13 | 14 | if __name__ == '__main__': 15 | for name, val in collect_env().items(): 16 | print(f'{name}: {val}') 17 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get root logger. 8 | 9 | Args: 10 | log_file (str, optional): File path of log. Defaults to None. 11 | log_level (int, optional): The level of logger. 12 | Defaults to logging.INFO. 13 | 14 | Returns: 15 | :obj:`logging.Logger`: The obtained logger 16 | """ 17 | logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level) 18 | 19 | return logger 20 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of code 18 | suitable for async implementation. 19 | """ 20 | if (not enabled) or not torch.cuda.is_available(): 21 | yield 22 | return 23 | stream = stream if stream else torch.cuda.current_stream() 24 | end_stream = end_stream if end_stream else stream 25 | start = torch.cuda.Event(enable_timing=True) 26 | end = torch.cuda.Event(enable_timing=True) 27 | stream.record_event(start) 28 | try: 29 | cpu_start = time.monotonic() 30 | yield 31 | finally: 32 | cpu_end = time.monotonic() 33 | end_stream.record_event(end) 34 | end.synchronize() 35 | cpu_time = (cpu_end - cpu_start) * 1000 36 | gpu_time = start.elapsed_time(end) 37 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 38 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 39 | print(msg, end_stream) 40 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '2.8.0' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | mmlvis 5 | scipy 6 | sklearn 7 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | mmpycocotools 3 | numpy 4 | six 5 | terminaltables 6 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort==4.3.21 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | ubelt 10 | xdoctest>=0.10.0 11 | yapf 12 | -------------------------------------------------------------------------------- /resources/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/coco_test_12510.jpg -------------------------------------------------------------------------------- /resources/corruptions_sev_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/corruptions_sev_3.png -------------------------------------------------------------------------------- /resources/data_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/data_pipeline.png -------------------------------------------------------------------------------- /resources/loss_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/loss_curve.png -------------------------------------------------------------------------------- /resources/mmdet-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/mmdet-logo.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /tests/async_benchmark.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import shutil 4 | import urllib 5 | 6 | import mmcv 7 | import torch 8 | 9 | from mmdet.apis import (async_inference_detector, inference_detector, 10 | init_detector) 11 | from mmdet.utils.contextmanagers import concurrent 12 | from mmdet.utils.profiling import profile_time 13 | 14 | 15 | async def main(): 16 | """Benchmark between async and synchronous inference interfaces. 17 | 18 | Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x: 19 | 20 | async sync 21 | 22 | 7981.79 ms 9660.82 ms 23 | 8074.52 ms 9660.94 ms 24 | 7976.44 ms 9406.83 ms 25 | 26 | Async variant takes about 0.83-0.85 of the time of the synchronous 27 | interface. 28 | """ 29 | project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 30 | 31 | config_file = os.path.join( 32 | project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') 33 | checkpoint_file = os.path.join( 34 | project_dir, 35 | 'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') 36 | 37 | if not os.path.exists(checkpoint_file): 38 | url = ('http://download.openmmlab.com/mmdetection/v2.0' 39 | '/mask_rcnn/mask_rcnn_r50_fpn_1x_coco' 40 | '/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') 41 | print(f'Downloading {url} ...') 42 | local_filename, _ = urllib.request.urlretrieve(url) 43 | os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True) 44 | shutil.move(local_filename, checkpoint_file) 45 | print(f'Saved as {checkpoint_file}') 46 | else: 47 | print(f'Using existing checkpoint {checkpoint_file}') 48 | 49 | device = 'cuda:0' 50 | model = init_detector( 51 | config_file, checkpoint=checkpoint_file, device=device) 52 | 53 | # queue is used for concurrent inference of multiple images 54 | streamqueue = asyncio.Queue() 55 | # queue size defines concurrency level 56 | streamqueue_size = 4 57 | 58 | for _ in range(streamqueue_size): 59 | streamqueue.put_nowait(torch.cuda.Stream(device=device)) 60 | 61 | # test a single image and show the results 62 | img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg')) 63 | 64 | # warmup 65 | await async_inference_detector(model, img) 66 | 67 | async def detect(img): 68 | async with concurrent(streamqueue): 69 | return await async_inference_detector(model, img) 70 | 71 | num_of_images = 20 72 | with profile_time('benchmark', 'async'): 73 | tasks = [ 74 | asyncio.create_task(detect(img)) for _ in range(num_of_images) 75 | ] 76 | async_results = await asyncio.gather(*tasks) 77 | 78 | with torch.cuda.stream(torch.cuda.default_stream()): 79 | with profile_time('benchmark', 'sync'): 80 | sync_results = [ 81 | inference_detector(model, img) for _ in range(num_of_images) 82 | ] 83 | 84 | result_dir = os.path.join(project_dir, 'demo') 85 | model.show_result( 86 | img, 87 | async_results[0], 88 | score_thr=0.5, 89 | show=False, 90 | out_file=os.path.join(result_dir, 'result_async.jpg')) 91 | model.show_result( 92 | img, 93 | sync_results[0], 94 | score_thr=0.5, 95 | show=False, 96 | out_file=os.path.join(result_dir, 'result_sync.jpg')) 97 | 98 | 99 | if __name__ == '__main__': 100 | asyncio.run(main()) 101 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | """Tests for async interface.""" 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | import asynctest 8 | import mmcv 9 | import torch 10 | 11 | from mmdet.apis import async_inference_detector, init_detector 12 | 13 | if sys.version_info >= (3, 7): 14 | from mmdet.utils.contextmanagers import concurrent 15 | 16 | 17 | class AsyncTestCase(asynctest.TestCase): 18 | use_default_loop = False 19 | forbid_get_event_loop = True 20 | 21 | TEST_TIMEOUT = int(os.getenv('ASYNCIO_TEST_TIMEOUT', '30')) 22 | 23 | def _run_test_method(self, method): 24 | result = method() 25 | if asyncio.iscoroutine(result): 26 | self.loop.run_until_complete( 27 | asyncio.wait_for(result, timeout=self.TEST_TIMEOUT)) 28 | 29 | 30 | class MaskRCNNDetector: 31 | 32 | def __init__(self, 33 | model_config, 34 | checkpoint=None, 35 | streamqueue_size=3, 36 | device='cuda:0'): 37 | 38 | self.streamqueue_size = streamqueue_size 39 | self.device = device 40 | # build the model and load checkpoint 41 | self.model = init_detector( 42 | model_config, checkpoint=None, device=self.device) 43 | self.streamqueue = None 44 | 45 | async def init(self): 46 | self.streamqueue = asyncio.Queue() 47 | for _ in range(self.streamqueue_size): 48 | stream = torch.cuda.Stream(device=self.device) 49 | self.streamqueue.put_nowait(stream) 50 | 51 | if sys.version_info >= (3, 7): 52 | 53 | async def apredict(self, img): 54 | if isinstance(img, str): 55 | img = mmcv.imread(img) 56 | async with concurrent(self.streamqueue): 57 | result = await async_inference_detector(self.model, img) 58 | return result 59 | 60 | 61 | class AsyncInferenceTestCase(AsyncTestCase): 62 | 63 | if sys.version_info >= (3, 7): 64 | 65 | async def test_simple_inference(self): 66 | if not torch.cuda.is_available(): 67 | import pytest 68 | 69 | pytest.skip('test requires GPU and torch+cuda') 70 | 71 | ori_grad_enabled = torch.is_grad_enabled() 72 | root_dir = os.path.dirname(os.path.dirname(__name__)) 73 | model_config = os.path.join( 74 | root_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') 75 | detector = MaskRCNNDetector(model_config) 76 | await detector.init() 77 | img_path = os.path.join(root_dir, 'demo/demo.jpg') 78 | bboxes, _ = await detector.apredict(img_path) 79 | self.assertTrue(bboxes) 80 | # asy inference detector will hack grad_enabled, 81 | # so restore here to avoid it to influence other tests 82 | torch.set_grad_enabled(ori_grad_enabled) 83 | -------------------------------------------------------------------------------- /tests/test_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox.coder import YOLOBBoxCoder 4 | 5 | 6 | def test_yolo_bbox_coder(): 7 | coder = YOLOBBoxCoder() 8 | bboxes = torch.Tensor([[-42., -29., 74., 61.], [-10., -29., 106., 61.], 9 | [22., -29., 138., 61.], [54., -29., 170., 61.]]) 10 | pred_bboxes = torch.Tensor([[0.4709, 0.6152, 0.1690, -0.4056], 11 | [0.5399, 0.6653, 0.1162, -0.4162], 12 | [0.4654, 0.6618, 0.1548, -0.4301], 13 | [0.4786, 0.6197, 0.1896, -0.4479]]) 14 | grid_size = 32 15 | expected_decode_bboxes = torch.Tensor( 16 | [[-53.6102, -10.3096, 83.7478, 49.6824], 17 | [-15.8700, -8.3901, 114.4236, 50.9693], 18 | [11.1822, -8.0924, 146.6034, 50.4476], 19 | [41.2068, -8.9232, 181.4236, 48.5840]]) 20 | assert expected_decode_bboxes.allclose( 21 | coder.decode(bboxes, pred_bboxes, grid_size)) 22 | -------------------------------------------------------------------------------- /tests/test_data/test_formatting.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from mmdet.datasets.builder import PIPELINES 6 | 7 | 8 | def test_default_format_bundle(): 9 | results = dict( 10 | img_prefix=osp.join(osp.dirname(__file__), '../data'), 11 | img_info=dict(filename='color.jpg')) 12 | load = dict(type='LoadImageFromFile') 13 | load = build_from_cfg(load, PIPELINES) 14 | bundle = dict(type='DefaultFormatBundle') 15 | bundle = build_from_cfg(bundle, PIPELINES) 16 | results = load(results) 17 | assert 'pad_shape' not in results 18 | assert 'scale_factor' not in results 19 | assert 'img_norm_cfg' not in results 20 | results = bundle(results) 21 | assert 'pad_shape' in results 22 | assert 'scale_factor' in results 23 | assert 'img_norm_cfg' in results 24 | -------------------------------------------------------------------------------- /tests/test_data/test_loading.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os.path as osp 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from mmdet.datasets.pipelines import (LoadImageFromFile, LoadImageFromWebcam, 8 | LoadMultiChannelImageFromFiles) 9 | 10 | 11 | class TestLoading(object): 12 | 13 | @classmethod 14 | def setup_class(cls): 15 | cls.data_prefix = osp.join(osp.dirname(__file__), '../data') 16 | 17 | def test_load_img(self): 18 | results = dict( 19 | img_prefix=self.data_prefix, img_info=dict(filename='color.jpg')) 20 | transform = LoadImageFromFile() 21 | results = transform(copy.deepcopy(results)) 22 | assert results['filename'] == osp.join(self.data_prefix, 'color.jpg') 23 | assert results['ori_filename'] == 'color.jpg' 24 | assert results['img'].shape == (288, 512, 3) 25 | assert results['img'].dtype == np.uint8 26 | assert results['img_shape'] == (288, 512, 3) 27 | assert results['ori_shape'] == (288, 512, 3) 28 | assert repr(transform) == transform.__class__.__name__ + \ 29 | "(to_float32=False, color_type='color', " + \ 30 | "file_client_args={'backend': 'disk'})" 31 | 32 | # no img_prefix 33 | results = dict( 34 | img_prefix=None, img_info=dict(filename='tests/data/color.jpg')) 35 | transform = LoadImageFromFile() 36 | results = transform(copy.deepcopy(results)) 37 | assert results['filename'] == 'tests/data/color.jpg' 38 | assert results['ori_filename'] == 'tests/data/color.jpg' 39 | assert results['img'].shape == (288, 512, 3) 40 | 41 | # to_float32 42 | transform = LoadImageFromFile(to_float32=True) 43 | results = transform(copy.deepcopy(results)) 44 | assert results['img'].dtype == np.float32 45 | 46 | # gray image 47 | results = dict( 48 | img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg')) 49 | transform = LoadImageFromFile() 50 | results = transform(copy.deepcopy(results)) 51 | assert results['img'].shape == (288, 512, 3) 52 | assert results['img'].dtype == np.uint8 53 | 54 | transform = LoadImageFromFile(color_type='unchanged') 55 | results = transform(copy.deepcopy(results)) 56 | assert results['img'].shape == (288, 512) 57 | assert results['img'].dtype == np.uint8 58 | 59 | def test_load_multi_channel_img(self): 60 | results = dict( 61 | img_prefix=self.data_prefix, 62 | img_info=dict(filename=['color.jpg', 'color.jpg'])) 63 | transform = LoadMultiChannelImageFromFiles() 64 | results = transform(copy.deepcopy(results)) 65 | assert results['filename'] == [ 66 | osp.join(self.data_prefix, 'color.jpg'), 67 | osp.join(self.data_prefix, 'color.jpg') 68 | ] 69 | assert results['ori_filename'] == ['color.jpg', 'color.jpg'] 70 | assert results['img'].shape == (288, 512, 3, 2) 71 | assert results['img'].dtype == np.uint8 72 | assert results['img_shape'] == (288, 512, 3, 2) 73 | assert results['ori_shape'] == (288, 512, 3, 2) 74 | assert results['pad_shape'] == (288, 512, 3, 2) 75 | assert results['scale_factor'] == 1.0 76 | assert repr(transform) == transform.__class__.__name__ + \ 77 | "(to_float32=False, color_type='unchanged', " + \ 78 | "file_client_args={'backend': 'disk'})" 79 | 80 | def test_load_webcam_img(self): 81 | img = mmcv.imread(osp.join(self.data_prefix, 'color.jpg')) 82 | results = dict(img=img) 83 | transform = LoadImageFromWebcam() 84 | results = transform(copy.deepcopy(results)) 85 | assert results['filename'] is None 86 | assert results['ori_filename'] is None 87 | assert results['img'].shape == (288, 512, 3) 88 | assert results['img'].dtype == np.uint8 89 | assert results['img_shape'] == (288, 512, 3) 90 | assert results['ori_shape'] == (288, 512, 3) 91 | -------------------------------------------------------------------------------- /tests/test_data/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from mmdet.datasets import replace_ImageToTensor 4 | 5 | 6 | def test_replace_ImageToTensor(): 7 | # with MultiScaleFlipAug 8 | pipelines = [ 9 | dict(type='LoadImageFromFile'), 10 | dict( 11 | type='MultiScaleFlipAug', 12 | img_scale=(1333, 800), 13 | flip=False, 14 | transforms=[ 15 | dict(type='Resize', keep_ratio=True), 16 | dict(type='RandomFlip'), 17 | dict(type='Normalize'), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']), 21 | ]) 22 | ] 23 | expected_pipelines = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize'), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='DefaultFormatBundle'), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | with pytest.warns(UserWarning): 39 | assert expected_pipelines == replace_ImageToTensor(pipelines) 40 | 41 | # without MultiScaleFlipAug 42 | pipelines = [ 43 | dict(type='LoadImageFromFile'), 44 | dict(type='Resize', keep_ratio=True), 45 | dict(type='RandomFlip'), 46 | dict(type='Normalize'), 47 | dict(type='Pad', size_divisor=32), 48 | dict(type='ImageToTensor', keys=['img']), 49 | dict(type='Collect', keys=['img']), 50 | ] 51 | expected_pipelines = [ 52 | dict(type='LoadImageFromFile'), 53 | dict(type='Resize', keep_ratio=True), 54 | dict(type='RandomFlip'), 55 | dict(type='Normalize'), 56 | dict(type='Pad', size_divisor=32), 57 | dict(type='DefaultFormatBundle'), 58 | dict(type='Collect', keys=['img']), 59 | ] 60 | with pytest.warns(UserWarning): 61 | assert expected_pipelines == replace_ImageToTensor(pipelines) 62 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import torch 4 | 5 | from mmdet.core.mask.structures import BitmapMasks, PolygonMasks 6 | from mmdet.core.utils import mask2ndarray 7 | 8 | 9 | def dummy_raw_polygon_masks(size): 10 | """ 11 | Args: 12 | size (tuple): expected shape of dummy masks, (N, H, W) 13 | 14 | Return: 15 | list[list[ndarray]]: dummy mask 16 | """ 17 | num_obj, heigt, width = size 18 | polygons = [] 19 | for _ in range(num_obj): 20 | num_points = np.random.randint(5) * 2 + 6 21 | polygons.append([np.random.uniform(0, min(heigt, width), num_points)]) 22 | return polygons 23 | 24 | 25 | def test_mask2ndarray(): 26 | raw_masks = np.ones((3, 28, 28)) 27 | bitmap_mask = BitmapMasks(raw_masks, 28, 28) 28 | output_mask = mask2ndarray(bitmap_mask) 29 | assert np.allclose(raw_masks, output_mask) 30 | 31 | raw_masks = dummy_raw_polygon_masks((3, 28, 28)) 32 | polygon_masks = PolygonMasks(raw_masks, 28, 28) 33 | output_mask = mask2ndarray(polygon_masks) 34 | assert output_mask.shape == (3, 28, 28) 35 | 36 | raw_masks = np.ones((3, 28, 28)) 37 | output_mask = mask2ndarray(raw_masks) 38 | assert np.allclose(raw_masks, output_mask) 39 | 40 | raw_masks = torch.ones((3, 28, 28)) 41 | output_mask = mask2ndarray(raw_masks) 42 | assert np.allclose(raw_masks, output_mask) 43 | 44 | # test unsupported type 45 | raw_masks = [] 46 | with pytest.raises(TypeError): 47 | output_mask = mask2ndarray(raw_masks) 48 | -------------------------------------------------------------------------------- /tests/test_models/test_position_encoding.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from mmdet.models.utils import (LearnedPositionalEncoding, 5 | SinePositionalEncoding) 6 | 7 | 8 | def test_sine_positional_encoding(num_feats=16, batch_size=2): 9 | # test invalid type of scale 10 | with pytest.raises(AssertionError): 11 | module = SinePositionalEncoding( 12 | num_feats, scale=(3., ), normalize=True) 13 | 14 | module = SinePositionalEncoding(num_feats) 15 | h, w = 10, 6 16 | mask = torch.rand(batch_size, h, w) > 0.5 17 | assert not module.normalize 18 | out = module(mask) 19 | assert out.shape == (batch_size, num_feats * 2, h, w) 20 | 21 | # set normalize 22 | module = SinePositionalEncoding(num_feats, normalize=True) 23 | assert module.normalize 24 | out = module(mask) 25 | assert out.shape == (batch_size, num_feats * 2, h, w) 26 | 27 | 28 | def test_learned_positional_encoding(num_feats=16, 29 | row_num_embed=10, 30 | col_num_embed=10, 31 | batch_size=2): 32 | module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed) 33 | assert module.row_embed.weight.shape == (row_num_embed, num_feats) 34 | assert module.col_embed.weight.shape == (col_num_embed, num_feats) 35 | h, w = 10, 6 36 | mask = torch.rand(batch_size, h, w) > 0.5 37 | out = module(mask) 38 | assert out.shape == (batch_size, num_feats * 2, h, w) 39 | -------------------------------------------------------------------------------- /tests/test_models/test_roi_extractor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from mmdet.models.roi_heads.roi_extractors import GenericRoIExtractor 5 | 6 | 7 | def test_groie(): 8 | # test with pre/post 9 | cfg = dict( 10 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), 11 | out_channels=256, 12 | featmap_strides=[4, 8, 16, 32], 13 | pre_cfg=dict( 14 | type='ConvModule', 15 | in_channels=256, 16 | out_channels=256, 17 | kernel_size=5, 18 | padding=2, 19 | inplace=False, 20 | ), 21 | post_cfg=dict( 22 | type='ConvModule', 23 | in_channels=256, 24 | out_channels=256, 25 | kernel_size=5, 26 | padding=2, 27 | inplace=False)) 28 | 29 | groie = GenericRoIExtractor(**cfg) 30 | 31 | feats = ( 32 | torch.rand((1, 256, 200, 336)), 33 | torch.rand((1, 256, 100, 168)), 34 | torch.rand((1, 256, 50, 84)), 35 | torch.rand((1, 256, 25, 42)), 36 | ) 37 | 38 | rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]]) 39 | 40 | res = groie(feats, rois) 41 | assert res.shape == torch.Size([1, 256, 7, 7]) 42 | 43 | # test w.o. pre/post 44 | cfg = dict( 45 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), 46 | out_channels=256, 47 | featmap_strides=[4, 8, 16, 32]) 48 | 49 | groie = GenericRoIExtractor(**cfg) 50 | 51 | feats = ( 52 | torch.rand((1, 256, 200, 336)), 53 | torch.rand((1, 256, 100, 168)), 54 | torch.rand((1, 256, 50, 84)), 55 | torch.rand((1, 256, 25, 42)), 56 | ) 57 | 58 | rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]]) 59 | 60 | res = groie(feats, rois) 61 | assert res.shape == torch.Size([1, 256, 7, 7]) 62 | 63 | # test w.o. pre/post concat 64 | cfg = dict( 65 | aggregation='concat', 66 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), 67 | out_channels=256 * 4, 68 | featmap_strides=[4, 8, 16, 32]) 69 | 70 | groie = GenericRoIExtractor(**cfg) 71 | 72 | feats = ( 73 | torch.rand((1, 256, 200, 336)), 74 | torch.rand((1, 256, 100, 168)), 75 | torch.rand((1, 256, 50, 84)), 76 | torch.rand((1, 256, 25, 42)), 77 | ) 78 | 79 | rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]]) 80 | 81 | res = groie(feats, rois) 82 | assert res.shape == torch.Size([1, 1024, 7, 7]) 83 | 84 | # test not supported aggregate method 85 | with pytest.raises(AssertionError): 86 | cfg = dict( 87 | aggregation='not support', 88 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), 89 | out_channels=1024, 90 | featmap_strides=[4, 8, 16, 32]) 91 | _ = GenericRoIExtractor(**cfg) 92 | 93 | # test concat channels number 94 | cfg = dict( 95 | aggregation='concat', 96 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), 97 | out_channels=256 * 5, # 256*5 != 256*4 98 | featmap_strides=[4, 8, 16, 32]) 99 | 100 | groie = GenericRoIExtractor(**cfg) 101 | 102 | feats = ( 103 | torch.rand((1, 256, 200, 336)), 104 | torch.rand((1, 256, 100, 168)), 105 | torch.rand((1, 256, 50, 84)), 106 | torch.rand((1, 256, 25, 42)), 107 | ) 108 | 109 | rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]]) 110 | 111 | # out_channels does not sum of feat channels 112 | with pytest.raises(AssertionError): 113 | _ = groie(feats, rois) 114 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | from mmdet import digit_version 2 | 3 | 4 | def test_version_check(): 5 | assert digit_version('1.0.5') > digit_version('1.0.5rc0') 6 | assert digit_version('1.0.5') > digit_version('1.0.4rc0') 7 | assert digit_version('1.0.5') > digit_version('1.0rc0') 8 | assert digit_version('1.0.0') > digit_version('0.6.2') 9 | assert digit_version('1.0.0') > digit_version('0.2.16') 10 | assert digit_version('1.0.5rc0') > digit_version('1.0.0rc0') 11 | assert digit_version('1.0.0rc1') > digit_version('1.0.0rc0') 12 | assert digit_version('1.0.0rc2') > digit_version('1.0.0rc0') 13 | assert digit_version('1.0.0rc2') > digit_version('1.0.0rc1') 14 | assert digit_version('1.0.1rc1') > digit_version('1.0.0rc1') 15 | assert digit_version('1.0.0') > digit_version('1.0.0rc1') 16 | -------------------------------------------------------------------------------- /tests/test_visualization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os 3 | import os.path as osp 4 | import tempfile 5 | 6 | import mmcv 7 | import numpy as np 8 | import pytest 9 | import torch 10 | 11 | from mmdet.core import visualization as vis 12 | 13 | 14 | def test_color(): 15 | assert vis.color_val_matplotlib(mmcv.Color.blue) == (0., 0., 1.) 16 | assert vis.color_val_matplotlib('green') == (0., 1., 0.) 17 | assert vis.color_val_matplotlib((1, 2, 3)) == (3 / 255, 2 / 255, 1 / 255) 18 | assert vis.color_val_matplotlib(100) == (100 / 255, 100 / 255, 100 / 255) 19 | assert vis.color_val_matplotlib(np.zeros(3, dtype=np.int)) == (0., 0., 0.) 20 | # forbid white color 21 | with pytest.raises(TypeError): 22 | vis.color_val_matplotlib([255, 255, 255]) 23 | # forbid float 24 | with pytest.raises(TypeError): 25 | vis.color_val_matplotlib(1.0) 26 | # overflowed 27 | with pytest.raises(AssertionError): 28 | vis.color_val_matplotlib((0, 0, 500)) 29 | 30 | 31 | def test_imshow_det_bboxes(): 32 | tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image', 33 | 'image.jpg') 34 | image = np.ones((10, 10, 3), np.uint8) 35 | bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]]) 36 | label = np.array([0, 1]) 37 | vis.imshow_det_bboxes( 38 | image, bbox, label, out_file=tmp_filename, show=False) 39 | assert osp.isfile(tmp_filename) 40 | 41 | # test shaped (0,) 42 | image = np.ones((10, 10, 3), np.uint8) 43 | bbox = np.ones((0, 4)) 44 | label = np.ones((0, )) 45 | vis.imshow_det_bboxes( 46 | image, bbox, label, out_file=tmp_filename, show=False) 47 | 48 | # test mask 49 | image = np.ones((10, 10, 3), np.uint8) 50 | bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]]) 51 | label = np.array([0, 1]) 52 | segms = np.random.random((2, 10, 10)) > 0.5 53 | segms = np.array(segms, np.int32) 54 | vis.imshow_det_bboxes( 55 | image, bbox, label, segms, out_file=tmp_filename, show=False) 56 | assert osp.isfile(tmp_filename) 57 | 58 | os.remove(tmp_filename) 59 | 60 | # test tensor mask type error 61 | with pytest.raises(AttributeError): 62 | segms = torch.tensor(segms) 63 | vis.imshow_det_bboxes(image, bbox, label, segms, show=False) 64 | 65 | 66 | def test_imshow_gt_det_bboxes(): 67 | tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image', 68 | 'image.jpg') 69 | image = np.ones((10, 10, 3), np.uint8) 70 | bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]]) 71 | label = np.array([0, 1]) 72 | annotation = dict(gt_bboxes=bbox, gt_labels=label) 73 | det_result = np.array([[2, 1, 3, 3, 0], [3, 4, 6, 6, 1]]) 74 | result = [det_result] 75 | vis.imshow_gt_det_bboxes( 76 | image, annotation, result, out_file=tmp_filename, show=False) 77 | assert osp.isfile(tmp_filename) 78 | 79 | # test numpy mask 80 | gt_mask = np.ones((2, 10, 10)) 81 | annotation['gt_masks'] = gt_mask 82 | vis.imshow_gt_det_bboxes( 83 | image, annotation, result, out_file=tmp_filename, show=False) 84 | assert osp.isfile(tmp_filename) 85 | 86 | # test tensor mask 87 | gt_mask = torch.ones((2, 10, 10)) 88 | annotation['gt_masks'] = gt_mask 89 | vis.imshow_gt_det_bboxes( 90 | image, annotation, result, out_file=tmp_filename, show=False) 91 | assert osp.isfile(tmp_filename) 92 | 93 | os.remove(tmp_filename) 94 | 95 | # test unsupported type 96 | annotation['gt_masks'] = [] 97 | with pytest.raises(TypeError): 98 | vis.imshow_gt_det_bboxes(image, annotation, result, show=False) 99 | -------------------------------------------------------------------------------- /tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import torch 5 | from mmcv import Config 6 | from mmcv.cnn import fuse_conv_bn 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmdet.datasets import (build_dataloader, build_dataset, 11 | replace_ImageToTensor) 12 | from mmdet.models import build_detector 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('checkpoint', help='checkpoint file') 19 | parser.add_argument( 20 | '--log-interval', default=50, help='interval of logging') 21 | parser.add_argument( 22 | '--fuse-conv-bn', 23 | action='store_true', 24 | help='Whether to fuse conv and bn, this will slightly increase' 25 | 'the inference speed') 26 | args = parser.parse_args() 27 | return args 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | 33 | cfg = Config.fromfile(args.config) 34 | # import modules from string list. 35 | if cfg.get('custom_imports', None): 36 | from mmcv.utils import import_modules_from_strings 37 | import_modules_from_strings(**cfg['custom_imports']) 38 | # set cudnn_benchmark 39 | if cfg.get('cudnn_benchmark', False): 40 | torch.backends.cudnn.benchmark = True 41 | cfg.model.pretrained = None 42 | cfg.data.test.test_mode = True 43 | 44 | # build the dataloader 45 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 46 | if samples_per_gpu > 1: 47 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 48 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) 49 | dataset = build_dataset(cfg.data.test) 50 | data_loader = build_dataloader( 51 | dataset, 52 | samples_per_gpu=1, 53 | workers_per_gpu=cfg.data.workers_per_gpu, 54 | dist=False, 55 | shuffle=False) 56 | 57 | # build the model and load checkpoint 58 | cfg.model.train_cfg = None 59 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 60 | fp16_cfg = cfg.get('fp16', None) 61 | if fp16_cfg is not None: 62 | wrap_fp16_model(model) 63 | load_checkpoint(model, args.checkpoint, map_location='cpu') 64 | if args.fuse_conv_bn: 65 | model = fuse_conv_bn(model) 66 | 67 | model = MMDataParallel(model, device_ids=[0]) 68 | 69 | model.eval() 70 | 71 | # the first several iterations may be very slow so skip them 72 | num_warmup = 5 73 | pure_inf_time = 0 74 | 75 | # benchmark with 2000 image and take the average 76 | for i, data in enumerate(data_loader): 77 | 78 | torch.cuda.synchronize() 79 | start_time = time.perf_counter() 80 | 81 | with torch.no_grad(): 82 | model(return_loss=False, rescale=True, **data) 83 | 84 | torch.cuda.synchronize() 85 | elapsed = time.perf_counter() - start_time 86 | 87 | if i >= num_warmup: 88 | pure_inf_time += elapsed 89 | if (i + 1) % args.log_interval == 0: 90 | fps = (i + 1 - num_warmup) / pure_inf_time 91 | print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') 92 | 93 | if (i + 1) == 2000: 94 | pure_inf_time += elapsed 95 | fps = (i + 1 - num_warmup) / pure_inf_time 96 | print(f'Overall fps: {fps:.1f} img / s') 97 | break 98 | 99 | 100 | if __name__ == '__main__': 101 | main() 102 | -------------------------------------------------------------------------------- /tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | import mmcv 6 | from mmcv import Config 7 | 8 | from mmdet.core.utils import mask2ndarray 9 | from mmdet.core.visualization import imshow_det_bboxes 10 | from mmdet.datasets.builder import build_dataset 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Browse a dataset') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--skip-type', 18 | type=str, 19 | nargs='+', 20 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 21 | help='skip some useless pipeline') 22 | parser.add_argument( 23 | '--output-dir', 24 | default=None, 25 | type=str, 26 | help='If there is no display interface, you can save it') 27 | parser.add_argument('--not-show', default=False, action='store_true') 28 | parser.add_argument( 29 | '--show-interval', 30 | type=float, 31 | default=2, 32 | help='the interval of show (s)') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def retrieve_data_cfg(config_path, skip_type): 38 | cfg = Config.fromfile(config_path) 39 | train_data_cfg = cfg.data.train 40 | train_data_cfg['pipeline'] = [ 41 | x for x in train_data_cfg.pipeline if x['type'] not in skip_type 42 | ] 43 | 44 | return cfg 45 | 46 | 47 | def main(): 48 | args = parse_args() 49 | cfg = retrieve_data_cfg(args.config, args.skip_type) 50 | 51 | dataset = build_dataset(cfg.data.train) 52 | 53 | progress_bar = mmcv.ProgressBar(len(dataset)) 54 | 55 | for item in dataset: 56 | filename = os.path.join(args.output_dir, 57 | Path(item['filename']).name 58 | ) if args.output_dir is not None else None 59 | 60 | gt_masks = item.get('gt_masks', None) 61 | if gt_masks is not None: 62 | gt_masks = mask2ndarray(gt_masks) 63 | 64 | imshow_det_bboxes( 65 | item['img'], 66 | item['gt_bboxes'], 67 | item['gt_labels'], 68 | gt_masks, 69 | class_names=dataset.CLASSES, 70 | show=not args.not_show, 71 | wait_time=args.show_interval, 72 | out_file=filename, 73 | bbox_color=(255, 102, 61), 74 | text_color=(255, 102, 61)) 75 | 76 | progress_bar.update() 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /tools/compute_auc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics 3 | 4 | # K (number of shots) 5 | x = np.array([1., 10., 30., 50., 100., 300., 500., 1000.]) 6 | x_log = np.log(x) / np.log(1000) 7 | # Average Recall scores 8 | y = np.array([0.0, 18.0, 26.5, 29.6, 33.4, 39.0, 41.5, 45.0]) 9 | y *= 0.01 10 | auc = metrics.auc(x_log, y) 11 | print('AUC score:', auc) -------------------------------------------------------------------------------- /tools/detectron2pytorch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import mmcv 5 | import torch 6 | 7 | arch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)} 8 | 9 | 10 | def convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names): 11 | # detectron replace bn with affine channel layer 12 | state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name + 13 | '_b']) 14 | state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name + 15 | '_s']) 16 | bn_size = state_dict[torch_name + '.weight'].size() 17 | state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size) 18 | state_dict[torch_name + '.running_var'] = torch.ones(bn_size) 19 | converted_names.add(caffe_name + '_b') 20 | converted_names.add(caffe_name + '_s') 21 | 22 | 23 | def convert_conv_fc(blobs, state_dict, caffe_name, torch_name, 24 | converted_names): 25 | state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name + 26 | '_w']) 27 | converted_names.add(caffe_name + '_w') 28 | if caffe_name + '_b' in blobs: 29 | state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name + 30 | '_b']) 31 | converted_names.add(caffe_name + '_b') 32 | 33 | 34 | def convert(src, dst, depth): 35 | """Convert keys in detectron pretrained ResNet models to pytorch style.""" 36 | # load arch_settings 37 | if depth not in arch_settings: 38 | raise ValueError('Only support ResNet-50 and ResNet-101 currently') 39 | block_nums = arch_settings[depth] 40 | # load caffe model 41 | caffe_model = mmcv.load(src, encoding='latin1') 42 | blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model 43 | # convert to pytorch style 44 | state_dict = OrderedDict() 45 | converted_names = set() 46 | convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names) 47 | convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names) 48 | for i in range(1, len(block_nums) + 1): 49 | for j in range(block_nums[i - 1]): 50 | if j == 0: 51 | convert_conv_fc(blobs, state_dict, f'res{i + 1}_{j}_branch1', 52 | f'layer{i}.{j}.downsample.0', converted_names) 53 | convert_bn(blobs, state_dict, f'res{i + 1}_{j}_branch1_bn', 54 | f'layer{i}.{j}.downsample.1', converted_names) 55 | for k, letter in enumerate(['a', 'b', 'c']): 56 | convert_conv_fc(blobs, state_dict, 57 | f'res{i + 1}_{j}_branch2{letter}', 58 | f'layer{i}.{j}.conv{k+1}', converted_names) 59 | convert_bn(blobs, state_dict, 60 | f'res{i + 1}_{j}_branch2{letter}_bn', 61 | f'layer{i}.{j}.bn{k + 1}', converted_names) 62 | # check if all layers are converted 63 | for key in blobs: 64 | if key not in converted_names: 65 | print(f'Not Convert: {key}') 66 | # save checkpoint 67 | checkpoint = dict() 68 | checkpoint['state_dict'] = state_dict 69 | torch.save(checkpoint, dst) 70 | 71 | 72 | def main(): 73 | parser = argparse.ArgumentParser(description='Convert model keys') 74 | parser.add_argument('src', help='src detectron model path') 75 | parser.add_argument('dst', help='save path') 76 | parser.add_argument('depth', type=int, help='ResNet model depth') 77 | args = parser.parse_args() 78 | convert(args.src, args.dst, args.depth) 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /tools/dist_test_bbox.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} \ 11 | --eval bbox -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --no-validate 10 | -------------------------------------------------------------------------------- /tools/dist_train_and_test_bbox.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 11 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4} --no-validate 12 | 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 16 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} \ 17 | --eval bbox 18 | 19 | 20 | -------------------------------------------------------------------------------- /tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mmcv 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Evaluate metric of the ' 11 | 'results saved in pkl format') 12 | parser.add_argument('config', help='Config of the model') 13 | parser.add_argument('pkl_results', help='Results in pickle format') 14 | parser.add_argument( 15 | '--format-only', 16 | action='store_true', 17 | help='Format the output results without perform evaluation. It is' 18 | 'useful when you want to format the result to a specific format and ' 19 | 'submit it to the test server') 20 | parser.add_argument( 21 | '--eval', 22 | type=str, 23 | nargs='+', 24 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",' 25 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | parser.add_argument( 37 | '--eval-options', 38 | nargs='+', 39 | action=DictAction, 40 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 41 | 'format will be kwargs for dataset.evaluate() function') 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main(): 47 | args = parse_args() 48 | 49 | cfg = Config.fromfile(args.config) 50 | assert args.eval or args.format_only, ( 51 | 'Please specify at least one operation (eval/format the results) with ' 52 | 'the argument "--eval", "--format-only"') 53 | if args.eval and args.format_only: 54 | raise ValueError('--eval and --format_only cannot be both specified') 55 | 56 | if args.cfg_options is not None: 57 | cfg.merge_from_dict(args.cfg_options) 58 | cfg.data.test.test_mode = True 59 | 60 | dataset = build_dataset(cfg.data.test) 61 | outputs = mmcv.load(args.pkl_results) 62 | 63 | kwargs = {} if args.eval_options is None else args.eval_options 64 | if args.format_only: 65 | dataset.format_results(outputs, **kwargs) 66 | if args.eval: 67 | eval_kwargs = cfg.get('evaluation', {}).copy() 68 | # hard-code way to remove EvalHook args 69 | for key in [ 70 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 71 | 'rule' 72 | ]: 73 | eval_kwargs.pop(key, None) 74 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 75 | print(dataset.evaluate(outputs, **eval_kwargs)) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from mmcv import Config 5 | 6 | from mmdet.models import build_detector 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[1280, 800], 22 | help='input image size') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | 29 | args = parse_args() 30 | 31 | if len(args.shape) == 1: 32 | input_shape = (3, args.shape[0], args.shape[0]) 33 | elif len(args.shape) == 2: 34 | input_shape = (3, ) + tuple(args.shape) 35 | else: 36 | raise ValueError('invalid input shape') 37 | 38 | cfg = Config.fromfile(args.config) 39 | # import modules from string list. 40 | if cfg.get('custom_imports', None): 41 | from mmcv.utils import import_modules_from_strings 42 | import_modules_from_strings(**cfg['custom_imports']) 43 | 44 | model = build_detector( 45 | cfg.model, 46 | train_cfg=cfg.get('train_cfg'), 47 | test_cfg=cfg.get('test_cfg')) 48 | if torch.cuda.is_available(): 49 | model.cuda() 50 | model.eval() 51 | 52 | if hasattr(model, 'forward_dummy'): 53 | model.forward = model.forward_dummy 54 | else: 55 | raise NotImplementedError( 56 | 'FLOPs counter is currently not currently supported with {}'. 57 | format(model.__class__.__name__)) 58 | 59 | flops, params = get_model_complexity_info(model, input_shape) 60 | split_line = '=' * 30 61 | print(f'{split_line}\nInput shape: {input_shape}\n' 62 | f'Flops: {flops}\nParams: {params}\n{split_line}') 63 | print('!!!Please be cautious if you use the results in papers. ' 64 | 'You may need to check if all ops are supported and verify that the ' 65 | 'flops computation is correct.') 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /tools/print_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | if out_file.endswith('.pth'): 26 | out_file_name = out_file[:-4] 27 | else: 28 | out_file_name = out_file 29 | final_file = out_file_name + f'-{sha[:8]}.pth' 30 | subprocess.Popen(['mv', out_file, final_file]) 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | process_checkpoint(args.in_file, args.out_file) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /tools/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | --------------------------------------------------------------------------------