├── .gitignore
├── LICENSE
├── README.md
├── configs
    ├── _base_
    │   ├── datasets
    │   │   ├── cityscapes_detection.py
    │   │   ├── cityscapes_instance.py
    │   │   ├── coco_detection.py
    │   │   ├── coco_instance.py
    │   │   ├── coco_instance_semantic.py
    │   │   ├── deepfashion.py
    │   │   ├── lvis_v0.5_detection.py
    │   │   ├── lvis_v0.5_detection_shot.py
    │   │   ├── lvis_v0.5_instance.py
    │   │   ├── lvis_v1_instance.py
    │   │   ├── voc0712.py
    │   │   └── wider_face.py
    │   ├── default_runtime.py
    │   ├── models
    │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   ├── fast_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   ├── faster_rcnn_r50_fpn.py
    │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── retinanet_r50_fpn.py
    │   │   ├── rpn_r50_caffe_c4.py
    │   │   ├── rpn_r50_fpn.py
    │   │   └── ssd300.py
    │   └── schedules
    │   │   ├── schedule_1x.py
    │   │   ├── schedule_20e.py
    │   │   └── schedule_2x.py
    └── oln_box
    │   ├── class_agn_faster_rcnn.py
    │   └── oln_box.py
├── docker
    └── Dockerfile
├── docs
    ├── 1_exist_data_model.md
    ├── 2_new_data_model.md
    ├── 3_exist_data_new_model.md
    ├── Makefile
    ├── api.rst
    ├── changelog.md
    ├── compatibility.md
    ├── conf.py
    ├── conventions.md
    ├── faq.md
    ├── get_started.md
    ├── index.rst
    ├── make.bat
    ├── model_zoo.md
    ├── projects.md
    ├── robustness_benchmarking.md
    ├── stat.py
    ├── tutorials
    │   ├── config.md
    │   ├── customize_dataset.md
    │   ├── customize_losses.md
    │   ├── customize_models.md
    │   ├── customize_runtime.md
    │   ├── data_pipeline.md
    │   ├── finetune.md
    │   ├── index.rst
    │   └── pytorch2onnx.md
    └── useful_tools.md
├── images
    ├── epic.png
    └── oln_overview.png
├── mmdet
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   ├── test.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── builder.py
    │   │   ├── point_generator.py
    │   │   └── utils.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── approx_max_iou_assigner.py
    │   │   │   ├── assign_result.py
    │   │   │   ├── atss_assigner.py
    │   │   │   ├── base_assigner.py
    │   │   │   ├── center_region_assigner.py
    │   │   │   ├── grid_assigner.py
    │   │   │   ├── hungarian_assigner.py
    │   │   │   ├── max_iou_assigner.py
    │   │   │   ├── point_assigner.py
    │   │   │   └── region_assigner.py
    │   │   ├── builder.py
    │   │   ├── coder
    │   │   │   ├── __init__.py
    │   │   │   ├── base_bbox_coder.py
    │   │   │   ├── bucketing_bbox_coder.py
    │   │   │   ├── delta_xywh_bbox_coder.py
    │   │   │   ├── legacy_delta_xywh_bbox_coder.py
    │   │   │   ├── pseudo_bbox_coder.py
    │   │   │   ├── tblr_bbox_coder.py
    │   │   │   └── yolo_bbox_coder.py
    │   │   ├── demodata.py
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── iou2d_calculator.py
    │   │   ├── match_costs
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── match_cost.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_sampler.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   ├── sampling_result.py
    │   │   │   └── score_hlr_sampler.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── eval_hooks.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── export
    │   │   ├── __init__.py
    │   │   └── pytorch2onnx.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   └── deprecated_fp16_utils.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   ├── structures.py
    │   │   └── utils.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   └── merge_augs.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    │   └── visualization
    │   │   ├── __init__.py
    │   │   └── image.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── coco.py
    │   ├── coco_split.py
    │   ├── cocoeval_wrappers.py
    │   ├── custom.py
    │   ├── dataset_wrappers.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── auto_augment.py
    │   │   ├── compose.py
    │   │   ├── formating.py
    │   │   ├── instaboost.py
    │   │   ├── loading.py
    │   │   ├── test_time_aug.py
    │   │   └── transforms.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   └── group_sampler.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── darknet.py
    │   │   ├── detectors_resnet.py
    │   │   ├── detectors_resnext.py
    │   │   ├── hourglass.py
    │   │   ├── hrnet.py
    │   │   ├── regnet.py
    │   │   ├── res2net.py
    │   │   ├── resnest.py
    │   │   ├── resnet.py
    │   │   ├── resnext.py
    │   │   ├── ssd_vgg.py
    │   │   └── trident_resnet.py
    │   ├── builder.py
    │   ├── dense_heads
    │   │   ├── __init__.py
    │   │   ├── anchor_head.py
    │   │   ├── base_dense_head.py
    │   │   ├── dense_test_mixins.py
    │   │   ├── oln_rpn_head.py
    │   │   ├── rpn_head.py
    │   │   └── rpn_test_mixin.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── faster_rcnn.py
    │   │   ├── mask_rcnn.py
    │   │   ├── rpn.py
    │   │   ├── rpn_detector.py
    │   │   └── two_stage.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── ae_loss.py
    │   │   ├── balanced_l1_loss.py
    │   │   ├── cross_entropy_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── gaussian_focal_loss.py
    │   │   ├── gfocal_loss.py
    │   │   ├── ghm_loss.py
    │   │   ├── iou_loss.py
    │   │   ├── mse_loss.py
    │   │   ├── pisa_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   ├── utils.py
    │   │   └── varifocal_loss.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   └── fpn.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── base_roi_head.py
    │   │   ├── bbox_heads
    │   │   │   ├── __init__.py
    │   │   │   ├── bbox_head.py
    │   │   │   ├── convfc_bbox_head.py
    │   │   │   └── convfc_bbox_score_head.py
    │   │   ├── mask_heads
    │   │   │   ├── __init__.py
    │   │   │   ├── coarse_mask_head.py
    │   │   │   ├── fcn_mask_head.py
    │   │   │   ├── fused_semantic_head.py
    │   │   │   ├── grid_head.py
    │   │   │   ├── htc_mask_head.py
    │   │   │   ├── mask_point_head.py
    │   │   │   └── maskiou_head.py
    │   │   ├── oln_roi_head.py
    │   │   ├── roi_extractors
    │   │   │   ├── __init__.py
    │   │   │   ├── base_roi_extractor.py
    │   │   │   ├── generic_roi_extractor.py
    │   │   │   └── single_level_roi_extractor.py
    │   │   ├── shared_heads
    │   │   │   ├── __init__.py
    │   │   │   └── res_layer.py
    │   │   ├── standard_roi_head.py
    │   │   └── test_mixins.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   ├── gaussian_target.py
    │   │   ├── positional_encoding.py
    │   │   ├── res_layer.py
    │   │   └── transformer.py
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── contextmanagers.py
    │   ├── logger.py
    │   ├── profiling.py
    │   └── util_mixins.py
    └── version.py
├── pytest.ini
├── requirements.txt
├── requirements
    ├── build.txt
    ├── docs.txt
    ├── optional.txt
    ├── readthedocs.txt
    ├── runtime.txt
    └── tests.txt
├── resources
    ├── coco_test_12510.jpg
    ├── corruptions_sev_3.png
    ├── data_pipeline.png
    ├── loss_curve.png
    └── mmdet-logo.png
├── setup.cfg
├── setup.py
├── tests
    ├── async_benchmark.py
    ├── test_anchor.py
    ├── test_assigner.py
    ├── test_async.py
    ├── test_coder.py
    ├── test_config.py
    ├── test_data
    │   ├── test_dataset.py
    │   ├── test_formatting.py
    │   ├── test_img_augment.py
    │   ├── test_loading.py
    │   ├── test_models_aug_test.py
    │   ├── test_rotate.py
    │   ├── test_sampler.py
    │   ├── test_shear.py
    │   ├── test_transform.py
    │   ├── test_translate.py
    │   └── test_utils.py
    ├── test_eval_hook.py
    ├── test_fp16.py
    ├── test_iou2d_calculator.py
    ├── test_masks.py
    ├── test_misc.py
    ├── test_models
    │   ├── test_backbones.py
    │   ├── test_forward.py
    │   ├── test_heads.py
    │   ├── test_losses.py
    │   ├── test_necks.py
    │   ├── test_pisa_heads.py
    │   ├── test_position_encoding.py
    │   ├── test_roi_extractor.py
    │   └── test_transformer.py
    ├── test_version.py
    └── test_visualization.py
└── tools
    ├── analyze_logs.py
    ├── analyze_results.py
    ├── benchmark.py
    ├── browse_dataset.py
    ├── coco_error_analysis.py
    ├── compute_auc.py
    ├── convert_datasets
        ├── cityscapes.py
        └── pascal_voc.py
    ├── detectron2pytorch.py
    ├── dist_test_bbox.sh
    ├── dist_train.sh
    ├── dist_train_and_test_bbox.sh
    ├── eval_metric.py
    ├── get_flops.py
    ├── print_config.py
    ├── publish_model.py
    ├── pytorch2onnx.py
    ├── regnet2mmdet.py
    ├── robustness_eval.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── test_robustness.py
    ├── train.py
    └── upgrade_model_version.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | data/
107 | data
108 | .vscode
109 | .idea
110 | .DS_Store
111 | 
112 | # custom
113 | *.pkl
114 | *.pkl.json
115 | *.log.json
116 | work_dirs/
117 | run_scripts/
118 | 
119 | # Pytorch
120 | *.pth
121 | *.py~
122 | *.sh~
123 | 
124 | # Token
125 | token.txt
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Learning Open-World Object Proposals without Learning to Classify
 3 | 
 4 | ## Pytorch implementation for "Learning Open-World Object Proposals without Learning to Classify" ([RA-L and ICRA 2022](https://arxiv.org/abs/2108.06753)) <br/>
 5 | 
 6 | [Dahun Kim](https://mcahny.github.io/), [Tsung-Yi Lin](https://scholar.google.com/citations?user=_BPdgV0AAAAJ), [Anelia Angelova](https://scholar.google.co.kr/citations?user=nkmDOPgAAAAJ), [In So Kweon](https://rcv.kaist.ac.kr), and [Weicheng Kuo](https://weichengkuo.github.io/).
 7 | 
 8 | ```bibtex
 9 | @article{kim2021oln,
10 |   title={Learning Open-World Object Proposals without Learning to Classify},
11 |   author={Kim, Dahun and Lin, Tsung-Yi and Angelova, Anelia and Kweon, In So and Kuo, Weicheng},
12 |   journal={IEEE Robotics and Automation Letters (RA-L)},
13 |   year={2022}
14 | }
15 | ```
16 | 
17 | 
18 | ## Introduction
19 | 
20 | Humans can recognize novel objects in this image despite having never seen them  before. “Is it possible to learn open-world (novel) object proposals?” In this paper we propose **Object Localization Network (OLN)** that learns localization cues instead of foreground vs background classification. Only trained on COCO, OLN is able to propose many novel objects (top) missed by Mask R-CNN (bottom) on an out-of-sample frame in an ego-centric video.
21 | 
22 | <img src="./images/epic.png" width="500"> <img src="./images/oln_overview.png" width="500"> <br/>
23 | 
24 | ## Cross-category generalization on COCO
25 | 
26 | We train OLN on COCO VOC categories, and test on non-VOC categories. Note our AR@k evaluation does not count those proposals on the 'seen' classes into the budget (k), to avoid evaluating recall on see-class objects.
27 | 
28 | |     Method     |  AUC  | AR@10 | AR@30 | AR@100 | AR@300 | AR@1000 | Download |
29 | |:--------------:|:-----:|:-----:|:-----:|:------:|:------:|:-------:|:--------:|
30 | |    OLN-Box     | 24.8  | 18.0  | 26.4  |  33.4  |  39.0  |  45.0   | [model](https://drive.google.com/uc?id=1uL6TRhpSILvWeR6DZ0x9K9VywrQXQvq9) |
31 | 
32 | 
33 | ## Disclaimer
34 | 
35 | This repo is tested under Python 3.7, PyTorch 1.7.0, Cuda 11.0, and mmcv==1.2.5.
36 | 
37 | ## Installation
38 | 
39 | This repo is built based on [mmdetection](https://github.com/open-mmlab/mmdetection). 
40 | 
41 | You can use following commands to create conda env with related dependencies.
42 | ```
43 | conda create -n oln python=3.7 -y
44 | conda activate oln
45 | conda install pytorch=1.7.0 torchvision cudatoolkit=11.0 -c pytorch -y
46 | pip install mmcv-full==1.2.7
47 | pip install -r requirements.txt
48 | pip install -v -e . 
49 | ```
50 | Please also refer to [get_started.md](docs/get_started.md) for more details of installation.
51 | 
52 | 
53 | ## Prepare datasets
54 | 
55 | COCO dataset is available from official websites. It is recommended to download and extract the dataset somewhere outside the project directory and symlink the dataset root to $OLN/data as below.
56 | ```
57 | object_localization_network
58 | ├── mmdet
59 | ├── tools
60 | ├── configs
61 | ├── data
62 | │   ├── coco
63 | │   │   ├── annotations
64 | │   │   ├── train2017
65 | │   │   ├── val2017
66 | │   │   ├── test2017
67 | 
68 | ```
69 | 
70 | 
71 | ## Testing
72 | Our trained models are available for download [here](https://drive.google.com/uc?id=1uL6TRhpSILvWeR6DZ0x9K9VywrQXQvq9). Place it under `trained_weights/latest.pth` and run the following commands to test OLN on COCO dataset.
73 | 
74 | ```
75 | # Multi-GPU distributed testing
76 | bash tools/dist_test_bbox.sh configs/oln_box/oln_box.py \
77 | trained_weights/latest.pth ${NUM_GPUS}
78 | # OR
79 | python tools/test.py configs/oln_box/oln_box.py work_dirs/oln_box/latest.pth --eval bbox
80 | ```
81 | 
82 | 
83 | ## Training
84 | ```
85 | # Multi-GPU distributed training
86 | bash tools/dist_train.sh configs/oln_box/oln_box.py ${NUM_GPUS}
87 | 
88 | ```
89 | 
90 | 
91 | ## Contact
92 | 
93 | If you have any questions regarding the repo, please contact Dahun Kim (mcahny01@gmail.com) or create an issue.
94 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(2048, 1024),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=1,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=8,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=data_root +
40 |             'annotations/instancesonly_filtered_gtFine_train.json',
41 |             img_prefix=data_root + 'leftImg8bit/train/',
42 |             pipeline=train_pipeline)),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root +
46 |         'annotations/instancesonly_filtered_gtFine_val.json',
47 |         img_prefix=data_root + 'leftImg8bit/val/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root +
52 |         'annotations/instancesonly_filtered_gtFine_test.json',
53 |         img_prefix=data_root + 'leftImg8bit/test/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='bbox')
56 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(
 9 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(2048, 1024),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=1,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=8,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=data_root +
40 |             'annotations/instancesonly_filtered_gtFine_train.json',
41 |             img_prefix=data_root + 'leftImg8bit/train/',
42 |             pipeline=train_pipeline)),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root +
46 |         'annotations/instancesonly_filtered_gtFine_val.json',
47 |         img_prefix=data_root + 'leftImg8bit/val/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root +
52 |         'annotations/instancesonly_filtered_gtFine_test.json',
53 |         img_prefix=data_root + 'leftImg8bit/test/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(metric=['bbox', 'segm'])
56 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(interval=1, metric='bbox')
49 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(
 8 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='SegRescale', scale_factor=1 / 8),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(
16 |         type='Collect',
17 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1333, 800),
24 |         flip=False,
25 |         transforms=[
26 |             dict(type='Resize', keep_ratio=True),
27 |             dict(type='RandomFlip', flip_ratio=0.5),
28 |             dict(type='Normalize', **img_norm_cfg),
29 |             dict(type='Pad', size_divisor=32),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=2,
36 |     workers_per_gpu=2,
37 |     train=dict(
38 |         type=dataset_type,
39 |         ann_file=data_root + 'annotations/instances_train2017.json',
40 |         img_prefix=data_root + 'train2017/',
41 |         seg_prefix=data_root + 'stuffthingmaps/train2017/',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         type=dataset_type,
50 |         ann_file=data_root + 'annotations/instances_val2017.json',
51 |         img_prefix=data_root + 'val2017/',
52 |         pipeline=test_pipeline))
53 | evaluation = dict(metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(750, 1101),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     imgs_per_gpu=2,
33 |     workers_per_gpu=1,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 |         img_prefix=data_root + 'Img/',
38 |         pipeline=train_pipeline,
39 |         data_root=data_root),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 |         img_prefix=data_root + 'Img/',
44 |         pipeline=test_pipeline,
45 |         data_root=data_root),
46 |     test=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root +
49 |         'annotations/DeepFashion_segmentation_gallery.json',
50 |         img_prefix=data_root + 'Img/',
51 |         pipeline=test_pipeline,
52 |         data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_detection.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_detection.py'
 2 | dataset_type = 'LVISV05Dataset'
 3 | data_root = 'data/LVIS/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         _delete_=True,
 9 |         type='ClassBalancedDataset',
10 |         oversample_thr=1e-3,
11 |         dataset=dict(
12 |             type=dataset_type,
13 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
14 |             # ann_file=data_root + 'annotations/lvis_v0.5_train_10.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_detection_shot.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_detection.py'
 2 | dataset_type = 'LVISV05Dataset'
 3 | data_root = 'data/LVIS/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         _delete_=True,
 9 |         type='ClassBalancedDataset',
10 |         oversample_thr=1e-3,
11 |         dataset=dict(
12 |             type=dataset_type,
13 |             # ann_file=data_root + 'annotations/lvis_v0.5_train.json',
14 |             ann_file=data_root + 'annotations/lvis_v0.5_train_10.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_instance.py'
 2 | dataset_type = 'LVISV05Dataset'
 3 | # data_root = 'data/lvis_v0.5/'
 4 | data_root = '/data2/LVIS/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_instance.py'
 2 | dataset_type = 'LVISV1Dataset'
 3 | data_root = 'data/lvis_v1/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         _delete_=True,
 9 |         type='ClassBalancedDataset',
10 |         oversample_thr=1e-3,
11 |         dataset=dict(
12 |             type=dataset_type,
13 |             ann_file=data_root + 'annotations/lvis_v1_train.json',
14 |             img_prefix=data_root)),
15 |     val=dict(
16 |         type=dataset_type,
17 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
18 |         img_prefix=data_root),
19 |     test=dict(
20 |         type=dataset_type,
21 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
22 |         img_prefix=data_root))
23 | evaluation = dict(metric=['bbox', 'segm'])
24 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1000, 600),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=3,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=[
40 |                 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 |                 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 |             ],
43 |             img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 |             pipeline=train_pipeline)),
45 |     val=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 |         img_prefix=data_root + 'VOC2007/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 |         img_prefix=data_root + 'VOC2007/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='PhotoMetricDistortion',
10 |         brightness_delta=32,
11 |         contrast_range=(0.5, 1.5),
12 |         saturation_range=(0.5, 1.5),
13 |         hue_delta=18),
14 |     dict(
15 |         type='Expand',
16 |         mean=img_norm_cfg['mean'],
17 |         to_rgb=img_norm_cfg['to_rgb'],
18 |         ratio_range=(1, 4)),
19 |     dict(
20 |         type='MinIoURandomCrop',
21 |         min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 |         min_crop_size=0.3),
23 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 |     dict(type='Normalize', **img_norm_cfg),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='DefaultFormatBundle'),
27 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(300, 300),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='Resize', keep_ratio=False),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=60,
44 |     workers_per_gpu=2,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=2,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root + 'train.txt',
51 |             img_prefix=data_root + 'WIDER_train/',
52 |             min_size=17,
53 |             pipeline=train_pipeline)),
54 |     val=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'val.txt',
57 |         img_prefix=data_root + 'WIDER_val/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + 'val.txt',
62 |         img_prefix=data_root + 'WIDER_val/',
63 |         pipeline=test_pipeline))
64 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | dist_params = dict(backend='nccl')
11 | log_level = 'INFO'
12 | load_from = None
13 | resume_from = None
14 | workflow = [('train', 1)]
15 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         strides=(1, 2, 2, 1),
 11 |         dilations=(1, 1, 1, 2),
 12 |         out_indices=(3, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=2048,
 20 |         feat_channels=2048,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=2048,
 39 |             featmap_strides=[16]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=2048,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 |     # model training and testing settings
 55 |     train_cfg=dict(
 56 |         rpn=dict(
 57 |             assigner=dict(
 58 |                 type='MaxIoUAssigner',
 59 |                 pos_iou_thr=0.7,
 60 |                 neg_iou_thr=0.3,
 61 |                 min_pos_iou=0.3,
 62 |                 match_low_quality=True,
 63 |                 ignore_iof_thr=-1),
 64 |             sampler=dict(
 65 |                 type='RandomSampler',
 66 |                 num=256,
 67 |                 pos_fraction=0.5,
 68 |                 neg_pos_ub=-1,
 69 |                 add_gt_as_proposals=False),
 70 |             allowed_border=0,
 71 |             pos_weight=-1,
 72 |             debug=False),
 73 |         rpn_proposal=dict(
 74 |             nms_across_levels=False,
 75 |             nms_pre=12000,
 76 |             nms_post=2000,
 77 |             max_num=2000,
 78 |             nms_thr=0.7,
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_across_levels=False,
 99 |             nms_pre=6000,
100 |             nms_post=1000,
101 |             max_num=1000,
102 |             nms_thr=0.7,
103 |             min_bbox_size=0),
104 |         rcnn=dict(
105 |             score_thr=0.05,
106 |             nms=dict(type='nms', iou_threshold=0.5),
107 |             max_per_img=100)))
108 | 


--------------------------------------------------------------------------------
/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(
35 |         rpn=dict(
36 |             assigner=dict(
37 |                 type='MaxIoUAssigner',
38 |                 pos_iou_thr=0.7,
39 |                 neg_iou_thr=0.3,
40 |                 min_pos_iou=0.3,
41 |                 ignore_iof_thr=-1),
42 |             sampler=dict(
43 |                 type='RandomSampler',
44 |                 num=256,
45 |                 pos_fraction=0.5,
46 |                 neg_pos_ub=-1,
47 |                 add_gt_as_proposals=False),
48 |             allowed_border=0,
49 |             pos_weight=-1,
50 |             debug=False)),
51 |     test_cfg=dict(
52 |         rpn=dict(
53 |             nms_across_levels=False,
54 |             nms_pre=12000,
55 |             nms_post=2000,
56 |             max_num=2000,
57 |             nms_thr=0.7,
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_across_levels=False,
56 |             nms_pre=2000,
57 |             nms_post=1000,
58 |             max_num=1000,
59 |             nms_thr=0.7,
60 |             min_bbox_size=0)))
61 | 


--------------------------------------------------------------------------------
/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
31 |     train_cfg=dict(
32 |         assigner=dict(
33 |             type='MaxIoUAssigner',
34 |             pos_iou_thr=0.5,
35 |             neg_iou_thr=0.5,
36 |             min_pos_iou=0.,
37 |             ignore_iof_thr=-1,
38 |             gt_max_assign_all=False),
39 |         smoothl1_beta=1.,
40 |         allowed_border=-1,
41 |         pos_weight=-1,
42 |         neg_pos_ratio=3,
43 |         debug=False),
44 |     test_cfg=dict(
45 |         nms=dict(type='nms', iou_threshold=0.45),
46 |         min_bbox_size=0,
47 |         score_thr=0.02,
48 |         max_per_img=200))
49 | cudnn_benchmark = True
50 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | total_epochs = 12
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | total_epochs = 20
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | total_epochs = 24
12 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH="1.6.0"
 2 | ARG CUDA="10.1"
 3 | ARG CUDNN="7"
 4 | 
 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 6 | 
 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
10 | 
11 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | # Install MMCV
16 | RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
17 | 
18 | # Install MMDetection
19 | RUN conda clean --all
20 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
21 | WORKDIR /mmdetection
22 | ENV FORCE_CUDA="1"
23 | RUN pip install -r requirements/build.txt
24 | RUN pip install --no-cache-dir -e .
25 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | API Reference
  2 | =================
  3 | 
  4 | mmdet.apis
  5 | --------------
  6 | .. automodule:: mmdet.apis
  7 |     :members:
  8 | 
  9 | mmdet.core
 10 | --------------
 11 | 
 12 | anchor
 13 | ^^^^^^^^^^
 14 | .. automodule:: mmdet.core.anchor
 15 |     :members:
 16 | 
 17 | bbox
 18 | ^^^^^^^^^^
 19 | .. automodule:: mmdet.core.bbox
 20 |     :members:
 21 | 
 22 | export
 23 | ^^^^^^^^^^
 24 | .. automodule:: mmdet.core.export
 25 |     :members:
 26 | 
 27 | mask
 28 | ^^^^^^^^^^
 29 | .. automodule:: mmdet.core.mask
 30 |     :members:
 31 | 
 32 | evaluation
 33 | ^^^^^^^^^^
 34 | .. automodule:: mmdet.core.evaluation
 35 |     :members:
 36 | 
 37 | post_processing
 38 | ^^^^^^^^^^^^^^^
 39 | .. automodule:: mmdet.core.post_processing
 40 |     :members:
 41 | 
 42 | optimizer
 43 | ^^^^^^^^^^
 44 | .. automodule:: mmdet.core.optimizer
 45 |     :members:
 46 | 
 47 | utils
 48 | ^^^^^^^^^^
 49 | .. automodule:: mmdet.core.utils
 50 |     :members:
 51 | 
 52 | mmdet.datasets
 53 | --------------
 54 | 
 55 | datasets
 56 | ^^^^^^^^^^
 57 | .. automodule:: mmdet.datasets
 58 |     :members:
 59 | 
 60 | pipelines
 61 | ^^^^^^^^^^
 62 | .. automodule:: mmdet.datasets.pipelines
 63 |     :members:
 64 | 
 65 | mmdet.models
 66 | --------------
 67 | 
 68 | detectors
 69 | ^^^^^^^^^^
 70 | .. automodule:: mmdet.models.detectors
 71 |     :members:
 72 | 
 73 | backbones
 74 | ^^^^^^^^^^
 75 | .. automodule:: mmdet.models.backbones
 76 |     :members:
 77 | 
 78 | necks
 79 | ^^^^^^^^^^^^
 80 | .. automodule:: mmdet.models.necks
 81 |     :members:
 82 | 
 83 | dense_heads
 84 | ^^^^^^^^^^^^
 85 | .. automodule:: mmdet.models.dense_heads
 86 |     :members:
 87 | 
 88 | roi_heads
 89 | ^^^^^^^^^^
 90 | .. automodule:: mmdet.models.roi_heads
 91 |     :members:
 92 | 
 93 | losses
 94 | ^^^^^^^^^^
 95 | .. automodule:: mmdet.models.losses
 96 |     :members:
 97 | 
 98 | utils
 99 | ^^^^^^^^^^
100 | .. automodule:: mmdet.models.utils
101 |     :members:
102 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import subprocess
15 | import sys
16 | 
17 | sys.path.insert(0, os.path.abspath('..'))
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'MMDetection'
22 | copyright = '2018-2020, OpenMMLab'
23 | author = 'MMDetection Authors'
24 | version_file = '../mmdet/version.py'
25 | 
26 | 
27 | def get_version():
28 |     with open(version_file, 'r') as f:
29 |         exec(compile(f.read(), version_file, 'exec'))
30 |     return locals()['__version__']
31 | 
32 | 
33 | # The full version, including alpha/beta/rc tags
34 | release = get_version()
35 | 
36 | # -- General configuration ---------------------------------------------------
37 | 
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 |     'sphinx.ext.autodoc',
43 |     'sphinx.ext.napoleon',
44 |     'sphinx.ext.viewcode',
45 |     'recommonmark',
46 |     'sphinx_markdown_tables',
47 | ]
48 | 
49 | autodoc_mock_imports = [
50 |     'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version', 'mmcv.ops'
51 | ]
52 | 
53 | # Add any paths that contain templates here, relative to this directory.
54 | templates_path = ['_templates']
55 | 
56 | # The suffix(es) of source filenames.
57 | # You can specify multiple suffix as a list of string:
58 | #
59 | source_suffix = {
60 |     '.rst': 'restructuredtext',
61 |     '.md': 'markdown',
62 | }
63 | 
64 | # The master toctree document.
65 | master_doc = 'index'
66 | 
67 | # List of patterns, relative to source directory, that match files and
68 | # directories to ignore when looking for source files.
69 | # This pattern also affects html_static_path and html_extra_path.
70 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
71 | 
72 | # -- Options for HTML output -------------------------------------------------
73 | 
74 | # The theme to use for HTML and HTML Help pages.  See the documentation for
75 | # a list of builtin themes.
76 | #
77 | html_theme = 'sphinx_rtd_theme'
78 | 
79 | # Add any paths that contain custom static files (such as style sheets) here,
80 | # relative to this directory. They are copied after the builtin static files,
81 | # so a file named "default.css" will overwrite the builtin "default.css".
82 | html_static_path = ['_static']
83 | 
84 | 
85 | def builder_inited_handler(app):
86 |     subprocess.run(['./stat.py'])
87 | 
88 | 
89 | def setup(app):
90 |     app.connect('builder-inited', builder_inited_handler)
91 | 


--------------------------------------------------------------------------------
/docs/conventions.md:
--------------------------------------------------------------------------------
 1 | # Conventions
 2 | 
 3 | Please check the following conventions if you would like to modify MMDetection as your own project.
 4 | 
 5 | ## Loss
 6 | 
 7 | In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`.
 8 | 
 9 | For example, in bbox head,
10 | 
11 | ```python
12 | class BBoxHead(nn.Module):
13 |     ...
14 |     def loss(self, ...):
15 |         losses = dict()
16 |         # classification loss
17 |         losses['loss_cls'] = self.loss_cls(...)
18 |         # classification accuracy
19 |         losses['acc'] = accuracy(...)
20 |         # bbox regression loss
21 |         losses['loss_bbox'] = self.loss_bbox(...)
22 |         return losses
23 | ```
24 | 
25 | `bbox_head.loss()` will be called during model forward.
26 | The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` .
27 | Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation,
28 | `'acc'` will only be used as a metric to monitor training process.
29 | 
30 | By default, only values whose keys contain `'loss'` will be back propagated.
31 | This behavior could be changed by modifying `BaseDetector.train_step()`.
32 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to MMDetection's documentation!
 2 | =======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Get Started
 7 | 
 8 |    get_started.md
 9 |    modelzoo_statistics.md
10 |    model_zoo.md
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 |    :caption: Quick Run
15 | 
16 |    1_exist_data_model.md
17 |    2_new_data_model.md
18 | 
19 | .. toctree::
20 |    :maxdepth: 2
21 |    :caption: Tutorials
22 | 
23 |    tutorials/index.rst
24 | 
25 | .. toctree::
26 |    :maxdepth: 2
27 |    :caption: Useful Tools and Scripts
28 | 
29 |    useful_tools.md
30 | 
31 | .. toctree::
32 |    :maxdepth: 2
33 |    :caption: Notes
34 | 
35 |    conventions.md
36 |    compatibility.md
37 |    projects.md
38 |    changelog.md
39 |    faq.md
40 | 
41 | .. toctree::
42 |    :caption: API Reference
43 | 
44 |    api.rst
45 | 
46 | Indices and tables
47 | ==================
48 | 
49 | * :ref:`genindex`
50 | * :ref:`search`
51 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/stat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import functools as func
 3 | import glob
 4 | import os.path as osp
 5 | import re
 6 | 
 7 | import numpy as np
 8 | 
 9 | url_prefix = 'https://github.com/open-mmlab/mmdetection/blob/master/'
10 | 
11 | files = sorted(glob.glob('../configs/*/README.md'))
12 | 
13 | stats = []
14 | titles = []
15 | num_ckpts = 0
16 | 
17 | for f in files:
18 |     url = osp.dirname(f.replace('../', url_prefix))
19 | 
20 |     with open(f, 'r') as content_file:
21 |         content = content_file.read()
22 | 
23 |     title = content.split('\n')[0].replace('# ', '').strip()
24 |     ckpts = set(x.lower().strip()
25 |                 for x in re.findall(r'\[model\]\((https?.*)\)', content))
26 | 
27 |     if len(ckpts) == 0:
28 |         continue
29 | 
30 |     _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)]
31 |     assert len(_papertype) > 0
32 |     papertype = _papertype[0]
33 | 
34 |     paper = set([(papertype, title)])
35 | 
36 |     titles.append(title)
37 |     num_ckpts += len(ckpts)
38 | 
39 |     statsmsg = f"""
40 | \t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)
41 | """
42 |     stats.append((paper, ckpts, statsmsg))
43 | 
44 | allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])
45 | msglist = '\n'.join(x for _, _, x in stats)
46 | 
47 | papertypes, papercounts = np.unique([t for t, _ in allpapers],
48 |                                     return_counts=True)
49 | countstr = '\n'.join(
50 |     [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
51 | 
52 | modelzoo = f"""
53 | # Model Zoo Statistics
54 | 
55 | * Number of papers: {len(set(titles))}
56 | {countstr}
57 | 
58 | * Number of checkpoints: {num_ckpts}
59 | 
60 | {msglist}
61 | """
62 | 
63 | with open('modelzoo_statistics.md', 'w') as f:
64 |     f.write(modelzoo)
65 | 


--------------------------------------------------------------------------------
/docs/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 2
 3 | 
 4 |    config.md
 5 |    customize_dataset.md
 6 |    data_pipeline.md
 7 |    customize_models.md
 8 |    customize_runtime.md
 9 |    customize_losses.md
10 |    finetune.md
11 |    pytorch2onnx.md
12 | 


--------------------------------------------------------------------------------
/images/epic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/images/epic.png


--------------------------------------------------------------------------------
/images/oln_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/images/oln_overview.png


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from .version import __version__, short_version
 4 | 
 5 | 
 6 | def digit_version(version_str):
 7 |     digit_version = []
 8 |     for x in version_str.split('.'):
 9 |         if x.isdigit():
10 |             digit_version.append(int(x))
11 |         elif x.find('rc') != -1:
12 |             patch_version = x.split('rc')
13 |             digit_version.append(int(patch_version[0]) - 1)
14 |             digit_version.append(int(patch_version[1]))
15 |     return digit_version
16 | 
17 | 
18 | mmcv_minimum_version = '1.2.4'
19 | mmcv_maximum_version = '1.3'
20 | mmcv_version = digit_version(mmcv.__version__)
21 | 
22 | 
23 | assert (mmcv_version >= digit_version(mmcv_minimum_version)
24 |         and mmcv_version <= digit_version(mmcv_maximum_version)), \
25 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
26 |     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
27 | 
28 | __all__ = ['__version__', 'short_version']
29 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .inference import (async_inference_detector, inference_detector,
 2 |                         init_detector, show_result_pyplot)
 3 | from .test import multi_gpu_test, single_gpu_test
 4 | from .train import get_root_logger, set_random_seed, train_detector
 5 | 
 6 | __all__ = [
 7 |     'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
 8 |     'async_inference_detector', 'inference_detector', 'show_result_pyplot',
 9 |     'multi_gpu_test', 'single_gpu_test'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .export import *  # noqa: F401, F403
5 | from .fp16 import *  # noqa: F401, F403
6 | from .mask import *  # noqa: F401, F403
7 | from .post_processing import *  # noqa: F401, F403
8 | from .utils import *  # noqa: F401, F403
9 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator,
 2 |                                YOLOAnchorGenerator)
 3 | from .builder import ANCHOR_GENERATORS, build_anchor_generator
 4 | from .point_generator import PointGenerator
 5 | from .utils import anchor_inside_flags, calc_region, images_to_levels
 6 | 
 7 | __all__ = [
 8 |     'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags',
 9 |     'PointGenerator', 'images_to_levels', 'calc_region',
10 |     'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | ANCHOR_GENERATORS = Registry('Anchor generator')
4 | 
5 | 
6 | def build_anchor_generator(cfg, default_args=None):
7 |     return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args)
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .builder import ANCHOR_GENERATORS
 4 | 
 5 | 
 6 | @ANCHOR_GENERATORS.register_module()
 7 | class PointGenerator(object):
 8 | 
 9 |     def _meshgrid(self, x, y, row_major=True):
10 |         xx = x.repeat(len(y))
11 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
12 |         if row_major:
13 |             return xx, yy
14 |         else:
15 |             return yy, xx
16 | 
17 |     def grid_points(self, featmap_size, stride=16, device='cuda'):
18 |         feat_h, feat_w = featmap_size
19 |         shift_x = torch.arange(0., feat_w, device=device) * stride
20 |         shift_y = torch.arange(0., feat_h, device=device) * stride
21 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
22 |         stride = shift_x.new_full((shift_xx.shape[0], ), stride)
23 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
24 |         all_points = shifts.to(device)
25 |         return all_points
26 | 
27 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
28 |         feat_h, feat_w = featmap_size
29 |         valid_h, valid_w = valid_size
30 |         assert valid_h <= feat_h and valid_w <= feat_w
31 |         valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
32 |         valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
33 |         valid_x[:valid_w] = 1
34 |         valid_y[:valid_h] = 1
35 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
36 |         valid = valid_xx & valid_yy
37 |         return valid
38 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def images_to_levels(target, num_levels):
 5 |     """Convert targets by image to targets by feature level.
 6 | 
 7 |     [target_img0, target_img1] -> [target_level0, target_level1, ...]
 8 |     """
 9 |     target = torch.stack(target, 0)
10 |     level_targets = []
11 |     start = 0
12 |     for n in num_levels:
13 |         end = start + n
14 |         # level_targets.append(target[:, start:end].squeeze(0))
15 |         level_targets.append(target[:, start:end])
16 |         start = end
17 |     return level_targets
18 | 
19 | 
20 | def anchor_inside_flags(flat_anchors,
21 |                         valid_flags,
22 |                         img_shape,
23 |                         allowed_border=0):
24 |     """Check whether the anchors are inside the border.
25 | 
26 |     Args:
27 |         flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).
28 |         valid_flags (torch.Tensor): An existing valid flags of anchors.
29 |         img_shape (tuple(int)): Shape of current image.
30 |         allowed_border (int, optional): The border to allow the valid anchor.
31 |             Defaults to 0.
32 | 
33 |     Returns:
34 |         torch.Tensor: Flags indicating whether the anchors are inside a \
35 |             valid range.
36 |     """
37 |     img_h, img_w = img_shape[:2]
38 |     if allowed_border >= 0:
39 |         inside_flags = valid_flags & \
40 |             (flat_anchors[:, 0] >= -allowed_border) & \
41 |             (flat_anchors[:, 1] >= -allowed_border) & \
42 |             (flat_anchors[:, 2] < img_w + allowed_border) & \
43 |             (flat_anchors[:, 3] < img_h + allowed_border)
44 |     else:
45 |         inside_flags = valid_flags
46 |     return inside_flags
47 | 
48 | 
49 | def calc_region(bbox, ratio, featmap_size=None):
50 |     """Calculate a proportional bbox region.
51 | 
52 |     The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
53 | 
54 |     Args:
55 |         bbox (Tensor): Bboxes to calculate regions, shape (n, 4).
56 |         ratio (float): Ratio of the output region.
57 |         featmap_size (tuple): Feature map size used for clipping the boundary.
58 | 
59 |     Returns:
60 |         tuple: x1, y1, x2, y2
61 |     """
62 |     x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
63 |     y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
64 |     x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
65 |     y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
66 |     if featmap_size is not None:
67 |         x1 = x1.clamp(min=0, max=featmap_size[1])
68 |         y1 = y1.clamp(min=0, max=featmap_size[0])
69 |         x2 = x2.clamp(min=0, max=featmap_size[1])
70 |         y2 = y2.clamp(min=0, max=featmap_size[0])
71 |     return (x1, y1, x2, y2)
72 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner,
 2 |                         MaxIoUAssigner, RegionAssigner)
 3 | from .builder import build_assigner, build_bbox_coder, build_sampler
 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder,
 5 |                     TBLRBBoxCoder)
 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps
 7 | from .samplers import (BaseSampler, CombinedSampler,
 8 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 9 |                        OHEMSampler, PseudoSampler, RandomSampler,
10 |                        SamplingResult, ScoreHLRSampler)
11 | from .transforms import (bbox2distance, bbox2result, bbox2roi,
12 |                          bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping,
13 |                          bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh,
14 |                          distance2bbox, roi2bbox)
15 | 
16 | __all__ = [
17 |     'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner',
18 |     'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
19 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
20 |     'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner',
21 |     'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back',
22 |     'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance',
23 |     'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder',
24 |     'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner',
25 |     'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh',
26 |     'RegionAssigner'
27 | ]
28 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 2 | from .assign_result import AssignResult
 3 | from .atss_assigner import ATSSAssigner
 4 | from .base_assigner import BaseAssigner
 5 | from .center_region_assigner import CenterRegionAssigner
 6 | from .grid_assigner import GridAssigner
 7 | from .hungarian_assigner import HungarianAssigner
 8 | from .max_iou_assigner import MaxIoUAssigner
 9 | from .point_assigner import PointAssigner
10 | from .region_assigner import RegionAssigner
11 | 
12 | __all__ = [
13 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
14 |     'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner',
15 |     'HungarianAssigner', 'RegionAssigner'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseAssigner(metaclass=ABCMeta):
 5 |     """Base assigner that assigns boxes to ground truth boxes."""
 6 | 
 7 |     @abstractmethod
 8 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
 9 |         """Assign boxes to either a ground truth boxe or a negative boxes."""
10 |         pass
11 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | 
 3 | BBOX_ASSIGNERS = Registry('bbox_assigner')
 4 | BBOX_SAMPLERS = Registry('bbox_sampler')
 5 | BBOX_CODERS = Registry('bbox_coder')
 6 | 
 7 | 
 8 | def build_assigner(cfg, **default_args):
 9 |     """Builder of box assigner."""
10 |     return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args)
11 | 
12 | 
13 | def build_sampler(cfg, **default_args):
14 |     """Builder of box sampler."""
15 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
16 | 
17 | 
18 | def build_bbox_coder(cfg, **default_args):
19 |     """Builder of box coder."""
20 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
21 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_bbox_coder import BaseBBoxCoder
 2 | from .bucketing_bbox_coder import BucketingBBoxCoder
 3 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder
 4 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder
 5 | from .pseudo_bbox_coder import PseudoBBoxCoder
 6 | from .tblr_bbox_coder import TBLRBBoxCoder
 7 | from .yolo_bbox_coder import YOLOBBoxCoder
 8 | 
 9 | __all__ = [
10 |     'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder',
11 |     'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder',
12 |     'BucketingBBoxCoder'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/base_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseBBoxCoder(metaclass=ABCMeta):
 5 |     """Base bounding box coder."""
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def encode(self, bboxes, gt_bboxes):
12 |         """Encode deltas between bboxes and ground truth boxes."""
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def decode(self, bboxes, bboxes_pred):
17 |         """Decode the predicted bboxes according to prediction and base
18 |         boxes."""
19 |         pass
20 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/pseudo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_CODERS
 2 | from .base_bbox_coder import BaseBBoxCoder
 3 | 
 4 | 
 5 | @BBOX_CODERS.register_module()
 6 | class PseudoBBoxCoder(BaseBBoxCoder):
 7 |     """Pseudo bounding box coder."""
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super(BaseBBoxCoder, self).__init__(**kwargs)
11 | 
12 |     def encode(self, bboxes, gt_bboxes):
13 |         """torch.Tensor: return the given ``bboxes``"""
14 |         return gt_bboxes
15 | 
16 |     def decode(self, bboxes, pred_bboxes):
17 |         """torch.Tensor: return the given ``pred_bboxes``"""
18 |         return pred_bboxes
19 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/yolo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_CODERS
 4 | from .base_bbox_coder import BaseBBoxCoder
 5 | 
 6 | 
 7 | @BBOX_CODERS.register_module()
 8 | class YOLOBBoxCoder(BaseBBoxCoder):
 9 |     """YOLO BBox coder.
10 | 
11 |     Following `YOLO <https://arxiv.org/abs/1506.02640>`_, this coder divide
12 |     image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh).
13 |     cx, cy in [0., 1.], denotes relative center position w.r.t the center of
14 |     bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`.
15 | 
16 |     Args:
17 |         eps (float): Min value of cx, cy when encoding.
18 |     """
19 | 
20 |     def __init__(self, eps=1e-6):
21 |         super(BaseBBoxCoder, self).__init__()
22 |         self.eps = eps
23 | 
24 |     def encode(self, bboxes, gt_bboxes, stride):
25 |         """Get box regression transformation deltas that can be used to
26 |         transform the ``bboxes`` into the ``gt_bboxes``.
27 | 
28 |         Args:
29 |             bboxes (torch.Tensor): Source boxes, e.g., anchors.
30 |             gt_bboxes (torch.Tensor): Target of the transformation, e.g.,
31 |                 ground-truth boxes.
32 |             stride (torch.Tensor | int): Stride of bboxes.
33 | 
34 |         Returns:
35 |             torch.Tensor: Box transformation deltas
36 |         """
37 | 
38 |         assert bboxes.size(0) == gt_bboxes.size(0)
39 |         assert bboxes.size(-1) == gt_bboxes.size(-1) == 4
40 |         x_center_gt = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) * 0.5
41 |         y_center_gt = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) * 0.5
42 |         w_gt = gt_bboxes[..., 2] - gt_bboxes[..., 0]
43 |         h_gt = gt_bboxes[..., 3] - gt_bboxes[..., 1]
44 |         x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5
45 |         y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5
46 |         w = bboxes[..., 2] - bboxes[..., 0]
47 |         h = bboxes[..., 3] - bboxes[..., 1]
48 |         w_target = torch.log((w_gt / w).clamp(min=self.eps))
49 |         h_target = torch.log((h_gt / h).clamp(min=self.eps))
50 |         x_center_target = ((x_center_gt - x_center) / stride + 0.5).clamp(
51 |             self.eps, 1 - self.eps)
52 |         y_center_target = ((y_center_gt - y_center) / stride + 0.5).clamp(
53 |             self.eps, 1 - self.eps)
54 |         encoded_bboxes = torch.stack(
55 |             [x_center_target, y_center_target, w_target, h_target], dim=-1)
56 |         return encoded_bboxes
57 | 
58 |     def decode(self, bboxes, pred_bboxes, stride):
59 |         """Apply transformation `pred_bboxes` to `boxes`.
60 | 
61 |         Args:
62 |             boxes (torch.Tensor): Basic boxes, e.g. anchors.
63 |             pred_bboxes (torch.Tensor): Encoded boxes with shape
64 |             stride (torch.Tensor | int): Strides of bboxes.
65 | 
66 |         Returns:
67 |             torch.Tensor: Decoded boxes.
68 |         """
69 |         assert pred_bboxes.size(0) == bboxes.size(0)
70 |         assert pred_bboxes.size(-1) == bboxes.size(-1) == 4
71 |         x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5
72 |         y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5
73 |         w = bboxes[..., 2] - bboxes[..., 0]
74 |         h = bboxes[..., 3] - bboxes[..., 1]
75 |         # Get outputs x, y
76 |         x_center_pred = (pred_bboxes[..., 0] - 0.5) * stride + x_center
77 |         y_center_pred = (pred_bboxes[..., 1] - 0.5) * stride + y_center
78 |         w_pred = torch.exp(pred_bboxes[..., 2]) * w
79 |         h_pred = torch.exp(pred_bboxes[..., 3]) * h
80 | 
81 |         decoded_bboxes = torch.stack(
82 |             (x_center_pred - w_pred / 2, y_center_pred - h_pred / 2,
83 |              x_center_pred + w_pred / 2, y_center_pred + h_pred / 2),
84 |             dim=-1)
85 | 
86 |         return decoded_bboxes
87 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/demodata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def ensure_rng(rng=None):
 6 |     """Simple version of the ``kwarray.ensure_rng``
 7 | 
 8 |     Args:
 9 |         rng (int | numpy.random.RandomState | None):
10 |             if None, then defaults to the global rng. Otherwise this can be an
11 |             integer or a RandomState class
12 |     Returns:
13 |         (numpy.random.RandomState) : rng -
14 |             a numpy random number generator
15 | 
16 |     References:
17 |         https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
18 |     """
19 | 
20 |     if rng is None:
21 |         rng = np.random.mtrand._rand
22 |     elif isinstance(rng, int):
23 |         rng = np.random.RandomState(rng)
24 |     else:
25 |         rng = rng
26 |     return rng
27 | 
28 | 
29 | def random_boxes(num=1, scale=1, rng=None):
30 |     """Simple version of ``kwimage.Boxes.random``
31 | 
32 |     Returns:
33 |         Tensor: shape (n, 4) in x1, y1, x2, y2 format.
34 | 
35 |     References:
36 |         https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
37 | 
38 |     Example:
39 |         >>> num = 3
40 |         >>> scale = 512
41 |         >>> rng = 0
42 |         >>> boxes = random_boxes(num, scale, rng)
43 |         >>> print(boxes)
44 |         tensor([[280.9925, 278.9802, 308.6148, 366.1769],
45 |                 [216.9113, 330.6978, 224.0446, 456.5878],
46 |                 [405.3632, 196.3221, 493.3953, 270.7942]])
47 |     """
48 |     rng = ensure_rng(rng)
49 | 
50 |     tlbr = rng.rand(num, 4).astype(np.float32)
51 | 
52 |     tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
53 |     tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
54 |     br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
55 |     br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
56 | 
57 |     tlbr[:, 0] = tl_x * scale
58 |     tlbr[:, 1] = tl_y * scale
59 |     tlbr[:, 2] = br_x * scale
60 |     tlbr[:, 3] = br_y * scale
61 | 
62 |     boxes = torch.from_numpy(tlbr)
63 |     return boxes
64 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_iou_calculator
2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps
3 | 
4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | IOU_CALCULATORS = Registry('IoU calculator')
4 | 
5 | 
6 | def build_iou_calculator(cfg, default_args=None):
7 |     """Builder of IoU calculator."""
8 |     return build_from_cfg(cfg, IOU_CALCULATORS, default_args)
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_match_cost
2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost
3 | 
4 | __all__ = [
5 |     'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost',
6 |     'FocalLossCost'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/match_costs/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | MATCH_COST = Registry('Match Cost')
4 | 
5 | 
6 | def build_match_cost(cfg, default_args=None):
7 |     """Builder of IoU calculator."""
8 |     return build_from_cfg(cfg, MATCH_COST, default_args)
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | from .score_hlr_sampler import ScoreHLRSampler
10 | 
11 | __all__ = [
12 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
13 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
14 |     'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_SAMPLERS, build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | @BBOX_SAMPLERS.register_module()
 6 | class CombinedSampler(BaseSampler):
 7 |     """A sampler that combines positive sampler and negative sampler."""
 8 | 
 9 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
10 |         super(CombinedSampler, self).__init__(**kwargs)
11 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
12 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
13 | 
14 |     def _sample_pos(self, **kwargs):
15 |         """Sample positive samples."""
16 |         raise NotImplementedError
17 | 
18 |     def _sample_neg(self, **kwargs):
19 |         """Sample negative samples."""
20 |         raise NotImplementedError
21 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from ..builder import BBOX_SAMPLERS
 5 | from .random_sampler import RandomSampler
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class InstanceBalancedPosSampler(RandomSampler):
10 |     """Instance balanced sampler that samples equal number of positive samples
11 |     for each instance."""
12 | 
13 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
14 |         """Sample positive boxes.
15 | 
16 |         Args:
17 |             assign_result (:obj:`AssignResult`): The assigned results of boxes.
18 |             num_expected (int): The number of expected positive samples
19 | 
20 |         Returns:
21 |             Tensor or ndarray: sampled indices.
22 |         """
23 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
24 |         if pos_inds.numel() != 0:
25 |             pos_inds = pos_inds.squeeze(1)
26 |         if pos_inds.numel() <= num_expected:
27 |             return pos_inds
28 |         else:
29 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
30 |             num_gts = len(unique_gt_inds)
31 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
32 |             sampled_inds = []
33 |             for i in unique_gt_inds:
34 |                 inds = torch.nonzero(
35 |                     assign_result.gt_inds == i.item(), as_tuple=False)
36 |                 if inds.numel() != 0:
37 |                     inds = inds.squeeze(1)
38 |                 else:
39 |                     continue
40 |                 if len(inds) > num_per_gt:
41 |                     inds = self.random_choice(inds, num_per_gt)
42 |                 sampled_inds.append(inds)
43 |             sampled_inds = torch.cat(sampled_inds)
44 |             if len(sampled_inds) < num_expected:
45 |                 num_extra = num_expected - len(sampled_inds)
46 |                 extra_inds = np.array(
47 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
48 |                 if len(extra_inds) > num_extra:
49 |                     extra_inds = self.random_choice(extra_inds, num_extra)
50 |                 extra_inds = torch.from_numpy(extra_inds).to(
51 |                     assign_result.gt_inds.device).long()
52 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
53 |             elif len(sampled_inds) > num_expected:
54 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
55 |             return sampled_inds
56 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class PseudoSampler(BaseSampler):
10 |     """A pseudo sampler that does not do sampling actually."""
11 | 
12 |     def __init__(self, **kwargs):
13 |         pass
14 | 
15 |     def _sample_pos(self, **kwargs):
16 |         """Sample positive samples."""
17 |         raise NotImplementedError
18 | 
19 |     def _sample_neg(self, **kwargs):
20 |         """Sample negative samples."""
21 |         raise NotImplementedError
22 | 
23 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
24 |         """Directly returns the positive and negative indices  of samples.
25 | 
26 |         Args:
27 |             assign_result (:obj:`AssignResult`): Assigned results
28 |             bboxes (torch.Tensor): Bounding boxes
29 |             gt_bboxes (torch.Tensor): Ground truth boxes
30 | 
31 |         Returns:
32 |             :obj:`SamplingResult`: sampler results
33 |         """
34 |         pos_inds = torch.nonzero(
35 |             assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
36 |         neg_inds = torch.nonzero(
37 |             assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
38 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
39 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
40 |                                          assign_result, gt_flags)
41 |         return sampling_result
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | @BBOX_SAMPLERS.register_module()
 8 | class RandomSampler(BaseSampler):
 9 |     """Random sampler.
10 | 
11 |     Args:
12 |         num (int): Number of samples
13 |         pos_fraction (float): Fraction of positive samples
14 |         neg_pos_up (int, optional): Upper bound number of negative and
15 |             positive samples. Defaults to -1.
16 |         add_gt_as_proposals (bool, optional): Whether to add ground truth
17 |             boxes as proposals. Defaults to True.
18 |     """
19 | 
20 |     def __init__(self,
21 |                  num,
22 |                  pos_fraction,
23 |                  neg_pos_ub=-1,
24 |                  add_gt_as_proposals=True,
25 |                  **kwargs):
26 |         from mmdet.core.bbox import demodata
27 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
28 |                                             add_gt_as_proposals)
29 |         self.rng = demodata.ensure_rng(kwargs.get('rng', None))
30 | 
31 |     def random_choice(self, gallery, num):
32 |         """Random select some elements from the gallery.
33 | 
34 |         If `gallery` is a Tensor, the returned indices will be a Tensor;
35 |         If `gallery` is a ndarray or list, the returned indices will be a
36 |         ndarray.
37 | 
38 |         Args:
39 |             gallery (Tensor | ndarray | list): indices pool.
40 |             num (int): expected sample num.
41 | 
42 |         Returns:
43 |             Tensor or ndarray: sampled indices.
44 |         """
45 |         assert len(gallery) >= num
46 | 
47 |         is_tensor = isinstance(gallery, torch.Tensor)
48 |         if not is_tensor:
49 |             if torch.cuda.is_available():
50 |                 device = torch.cuda.current_device()
51 |             else:
52 |                 device = 'cpu'
53 |             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
54 |         perm = torch.randperm(gallery.numel(), device=gallery.device)[:num]
55 |         rand_inds = gallery[perm]
56 |         if not is_tensor:
57 |             rand_inds = rand_inds.cpu().numpy()
58 |         return rand_inds
59 | 
60 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
61 |         """Randomly sample some positive samples."""
62 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
63 |         if pos_inds.numel() != 0:
64 |             pos_inds = pos_inds.squeeze(1)
65 |         if pos_inds.numel() <= num_expected:
66 |             return pos_inds
67 |         else:
68 |             return self.random_choice(pos_inds, num_expected)
69 | 
70 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
71 |         """Randomly sample some negative samples."""
72 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
73 |         if neg_inds.numel() != 0:
74 |             neg_inds = neg_inds.squeeze(1)
75 |         if len(neg_inds) <= num_expected:
76 |             return neg_inds
77 |         else:
78 |             return self.random_choice(neg_inds, num_expected)
79 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases,
 2 |                           get_classes, imagenet_det_classes,
 3 |                           imagenet_vid_classes, voc_classes)
 4 | from .eval_hooks import DistEvalHook, EvalHook
 5 | from .mean_ap import average_precision, eval_map, print_map_summary
 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 7 |                      print_recall_summary)
 8 | 
 9 | __all__ = [
10 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
11 |     'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes',
12 |     'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map',
13 |     'print_map_summary', 'eval_recalls', 'print_recall_summary',
14 |     'plot_num_recall', 'plot_iou_recall'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
32 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
33 |     for i in range(bboxes1.shape[0]):
34 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
35 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
36 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
37 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
38 |         overlap = np.maximum(x_end - x_start, 0) * np.maximum(
39 |             y_end - y_start, 0)
40 |         if mode == 'iou':
41 |             union = area1[i] + area2 - overlap
42 |         else:
43 |             union = area1[i] if not exchange else area2
44 |         union = np.maximum(union, eps)
45 |         ious[i, :] = overlap / union
46 |     if exchange:
47 |         ious = ious.T
48 |     return ious
49 | 


--------------------------------------------------------------------------------
/mmdet/core/export/__init__.py:
--------------------------------------------------------------------------------
1 | from .pytorch2onnx import (build_model_from_cfg,
2 |                            generate_inputs_and_wrap_model,
3 |                            preprocess_example_input)
4 | 
5 | __all__ = [
6 |     'build_model_from_cfg', 'generate_inputs_and_wrap_model',
7 |     'preprocess_example_input'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .deprecated_fp16_utils import \
2 |     DeprecatedFp16OptimizerHook as Fp16OptimizerHook
3 | from .deprecated_fp16_utils import deprecated_auto_fp16 as auto_fp16
4 | from .deprecated_fp16_utils import deprecated_force_fp32 as force_fp32
5 | from .deprecated_fp16_utils import \
6 |     deprecated_wrap_fp16_model as wrap_fp16_model
7 | 
8 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
9 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/deprecated_fp16_utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from mmcv.runner import (Fp16OptimizerHook, auto_fp16, force_fp32,
 4 |                          wrap_fp16_model)
 5 | 
 6 | 
 7 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook):
 8 |     """A wrapper class for the FP16 optimizer hook. This class wraps
 9 |     :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the
10 |     :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated.
11 | 
12 |     Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details.
13 | 
14 |     Args:
15 |         loss_scale (float): Scale factor multiplied with loss.
16 |     """
17 | 
18 |     def __init__(*args, **kwargs):
19 |         super().__init__(*args, **kwargs)
20 |         warnings.warn(
21 |             'Importing Fp16OptimizerHook from "mmdet.core" will be '
22 |             'deprecated in the future. Please import them from "mmcv.runner" '
23 |             'instead')
24 | 
25 | 
26 | def deprecated_auto_fp16(*args, **kwargs):
27 |     warnings.warn(
28 |         'Importing auto_fp16 from "mmdet.core" will be '
29 |         'deprecated in the future. Please import them from "mmcv.runner" '
30 |         'instead')
31 |     return auto_fp16(*args, **kwargs)
32 | 
33 | 
34 | def deprecated_force_fp32(*args, **kwargs):
35 |     warnings.warn(
36 |         'Importing force_fp32 from "mmdet.core" will be '
37 |         'deprecated in the future. Please import them from "mmcv.runner" '
38 |         'instead')
39 |     return force_fp32(*args, **kwargs)
40 | 
41 | 
42 | def deprecated_wrap_fp16_model(*args, **kwargs):
43 |     warnings.warn(
44 |         'Importing wrap_fp16_model from "mmdet.core" will be '
45 |         'deprecated in the future. Please import them from "mmcv.runner" '
46 |         'instead')
47 |     wrap_fp16_model(*args, **kwargs)
48 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks
3 | from .utils import encode_mask_results, split_combined_polys
4 | 
5 | __all__ = [
6 |     'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks',
7 |     'PolygonMasks', 'encode_mask_results'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.nn.modules.utils import _pair
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     """Compute mask target for positive proposals in multiple images.
 9 | 
10 |     Args:
11 |         pos_proposals_list (list[Tensor]): Positive proposals in multiple
12 |             images.
13 |         pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each
14 |             positive proposals.
15 |         gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of
16 |             each image.
17 |         cfg (dict): Config dict that specifies the mask size.
18 | 
19 |     Returns:
20 |         list[Tensor]: Mask target of each image.
21 |     """
22 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
23 |     mask_targets = map(mask_target_single, pos_proposals_list,
24 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
25 |     mask_targets = list(mask_targets)
26 |     if len(mask_targets) > 0:
27 |         mask_targets = torch.cat(mask_targets)
28 |     return mask_targets
29 | 
30 | 
31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
32 |     """Compute mask target for each positive proposal in the image.
33 | 
34 |     Args:
35 |         pos_proposals (Tensor): Positive proposals.
36 |         pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals.
37 |         gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap
38 |             or Polygon.
39 |         cfg (dict): Config dict that indicate the mask size.
40 | 
41 |     Returns:
42 |         Tensor: Mask target of each positive proposals in the image.
43 |     """
44 |     device = pos_proposals.device
45 |     mask_size = _pair(cfg.mask_size)
46 |     num_pos = pos_proposals.size(0)
47 |     if num_pos > 0:
48 |         proposals_np = pos_proposals.cpu().numpy()
49 |         maxh, maxw = gt_masks.height, gt_masks.width
50 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)
51 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)
52 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
53 | 
54 |         mask_targets = gt_masks.crop_and_resize(
55 |             proposals_np, mask_size, device=device,
56 |             inds=pos_assigned_gt_inds).to_ndarray()
57 | 
58 |         mask_targets = torch.from_numpy(mask_targets).float().to(device)
59 |     else:
60 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
61 | 
62 |     return mask_targets
63 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import pycocotools.mask as mask_util
 4 | 
 5 | 
 6 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 7 |     """Split the combined 1-D polys into masks.
 8 | 
 9 |     A mask is represented as a list of polys, and a poly is represented as
10 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
11 |     tensor. Here we need to split the tensor into original representations.
12 | 
13 |     Args:
14 |         polys (list): a list (length = image num) of 1-D tensors
15 |         poly_lens (list): a list (length = image num) of poly length
16 |         polys_per_mask (list): a list (length = image num) of poly number
17 |             of each mask
18 | 
19 |     Returns:
20 |         list: a list (length = image num) of list (length = mask num) of \
21 |             list (length = poly num) of numpy array.
22 |     """
23 |     mask_polys_list = []
24 |     for img_id in range(len(polys)):
25 |         polys_single = polys[img_id]
26 |         polys_lens_single = poly_lens[img_id].tolist()
27 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
28 | 
29 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
30 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
31 |         mask_polys_list.append(mask_polys)
32 |     return mask_polys_list
33 | 
34 | 
35 | # TODO: move this function to more proper place
36 | def encode_mask_results(mask_results):
37 |     """Encode bitmap mask to RLE code.
38 | 
39 |     Args:
40 |         mask_results (list | tuple[list]): bitmap mask results.
41 |             In mask scoring rcnn, mask_results is a tuple of (segm_results,
42 |             segm_cls_score).
43 | 
44 |     Returns:
45 |         list | tuple: RLE encoded mask.
46 |     """
47 |     if isinstance(mask_results, tuple):  # mask scoring
48 |         cls_segms, cls_mask_scores = mask_results
49 |     else:
50 |         cls_segms = mask_results
51 |     num_classes = len(cls_segms)
52 |     encoded_mask_results = [[] for _ in range(num_classes)]
53 |     for i in range(len(cls_segms)):
54 |         for cls_segm in cls_segms[i]:
55 |             encoded_mask_results[i].append(
56 |                 mask_util.encode(
57 |                     np.array(
58 |                         cls_segm[:, :, np.newaxis], order='F',
59 |                         dtype='uint8'))[0])  # encoded with RLE
60 |     if isinstance(mask_results, tuple):
61 |         return encoded_mask_results, cls_mask_scores
62 |     else:
63 |         return encoded_mask_results
64 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import fast_nms, multiclass_nms
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks', 'fast_nms'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads, reduce_mean
2 | from .misc import mask2ndarray, multi_apply, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply',
6 |     'unmap', 'mask2ndarray'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from collections import OrderedDict
 3 | 
 4 | import torch.distributed as dist
 5 | from mmcv.runner import OptimizerHook
 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 7 |                           _unflatten_dense_tensors)
 8 | 
 9 | 
10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
11 |     if bucket_size_mb > 0:
12 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
13 |         buckets = _take_tensors(tensors, bucket_size_bytes)
14 |     else:
15 |         buckets = OrderedDict()
16 |         for tensor in tensors:
17 |             tp = tensor.type()
18 |             if tp not in buckets:
19 |                 buckets[tp] = []
20 |             buckets[tp].append(tensor)
21 |         buckets = buckets.values()
22 | 
23 |     for bucket in buckets:
24 |         flat_tensors = _flatten_dense_tensors(bucket)
25 |         dist.all_reduce(flat_tensors)
26 |         flat_tensors.div_(world_size)
27 |         for tensor, synced in zip(
28 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
29 |             tensor.copy_(synced)
30 | 
31 | 
32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
33 |     """Allreduce gradients.
34 | 
35 |     Args:
36 |         params (list[torch.Parameters]): List of parameters of a model
37 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
38 |             Defaults to True.
39 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
40 |             Defaults to -1.
41 |     """
42 |     grads = [
43 |         param.grad.data for param in params
44 |         if param.requires_grad and param.grad is not None
45 |     ]
46 |     world_size = dist.get_world_size()
47 |     if coalesce:
48 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
49 |     else:
50 |         for tensor in grads:
51 |             dist.all_reduce(tensor.div_(world_size))
52 | 
53 | 
54 | class DistOptimizerHook(OptimizerHook):
55 |     """Deprecated optimizer hook for distributed training."""
56 | 
57 |     def __init__(self, *args, **kwargs):
58 |         warnings.warn('"DistOptimizerHook" is deprecated, please switch to'
59 |                       '"mmcv.runner.OptimizerHook".')
60 |         super().__init__(*args, **kwargs)
61 | 
62 | 
63 | def reduce_mean(tensor):
64 |     """"Obtain the mean of tensor on different GPUs."""
65 |     if not (dist.is_available() and dist.is_initialized()):
66 |         return tensor
67 |     tensor = tensor.clone()
68 |     dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)
69 |     return tensor
70 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from six.moves import map, zip
 6 | 
 7 | from ..mask.structures import BitmapMasks, PolygonMasks
 8 | 
 9 | 
10 | def multi_apply(func, *args, **kwargs):
11 |     """Apply function to a list of arguments.
12 | 
13 |     Note:
14 |         This function applies the ``func`` to multiple inputs and
15 |         map the multiple outputs of the ``func`` into different
16 |         list. Each list contains the same type of outputs corresponding
17 |         to different inputs.
18 | 
19 |     Args:
20 |         func (Function): A function that will be applied to a list of
21 |             arguments
22 | 
23 |     Returns:
24 |         tuple(list): A tuple containing multiple list, each list contains \
25 |             a kind of returned results by the function
26 |     """
27 |     pfunc = partial(func, **kwargs) if kwargs else func
28 |     map_results = map(pfunc, *args)
29 |     return tuple(map(list, zip(*map_results)))
30 | 
31 | 
32 | def unmap(data, count, inds, fill=0):
33 |     """Unmap a subset of item (data) back to the original set of items (of size
34 |     count)"""
35 |     if data.dim() == 1:
36 |         ret = data.new_full((count, ), fill)
37 |         ret[inds.type(torch.bool)] = data
38 |     else:
39 |         new_size = (count, ) + data.size()[1:]
40 |         ret = data.new_full(new_size, fill)
41 |         ret[inds.type(torch.bool), :] = data
42 |     return ret
43 | 
44 | 
45 | def mask2ndarray(mask):
46 |     """Convert Mask to ndarray..
47 | 
48 |     Args:
49 |         mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or
50 |         torch.Tensor or np.ndarray): The mask to be converted.
51 | 
52 |     Returns:
53 |         np.ndarray: Ndarray mask of shape (n, h, w) that has been converted
54 |     """
55 |     if isinstance(mask, (BitmapMasks, PolygonMasks)):
56 |         mask = mask.to_ndarray()
57 |     elif isinstance(mask, torch.Tensor):
58 |         mask = mask.detach().cpu().numpy()
59 |     elif not isinstance(mask, np.ndarray):
60 |         raise TypeError(f'Unsupported {type(mask)} data type')
61 |     return mask
62 | 


--------------------------------------------------------------------------------
/mmdet/core/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .image import (color_val_matplotlib, imshow_det_bboxes,
2 |                     imshow_gt_det_bboxes)
3 | 
4 | __all__ = ['imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib']
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 2 | from .coco import CocoDataset
 3 | from .coco_split import CocoSplitDataset
 4 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
 5 |                                RepeatDataset)
 6 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler
 7 | from .utils import get_loading_pipeline, replace_ImageToTensor
 8 | 
 9 | __all__ = [
10 |     'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
11 |     'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
12 |     'ClassBalancedDataset', 'DATASETS', 'PIPELINES',
13 |     'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline'
14 |     'CocoSplitDataset'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform,
 2 |                            ContrastTransform, EqualizeTransform, Rotate, Shear,
 3 |                            Translate)
 4 | from .compose import Compose
 5 | from .formating import (Collect, DefaultFormatBundle, ImageToTensor,
 6 |                         ToDataContainer, ToTensor, Transpose, to_tensor)
 7 | from .instaboost import InstaBoost
 8 | from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam,
 9 |                       LoadMultiChannelImageFromFiles, LoadProposals)
10 | from .test_time_aug import MultiScaleFlipAug
11 | from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize,
12 |                          Pad, PhotoMetricDistortion, RandomCenterCropPad,
13 |                          RandomCrop, RandomFlip, Resize, SegRescale)
14 | 
15 | __all__ = [
16 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
17 |     'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations',
18 |     'LoadImageFromFile', 'LoadImageFromWebcam',
19 |     'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug',
20 |     'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale',
21 |     'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu',
22 |     'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'CutOut', 'Shear',
23 |     'Rotate', 'ColorTransform', 'EqualizeTransform', 'BrightnessTransform',
24 |     'ContrastTransform', 'Translate'
25 | ]
26 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from mmcv.utils import build_from_cfg
 4 | 
 5 | from ..builder import PIPELINES
 6 | 
 7 | 
 8 | @PIPELINES.register_module()
 9 | class Compose(object):
10 |     """Compose multiple transforms sequentially.
11 | 
12 |     Args:
13 |         transforms (Sequence[dict | callable]): Sequence of transform object or
14 |             config dict to be composed.
15 |     """
16 | 
17 |     def __init__(self, transforms):
18 |         assert isinstance(transforms, collections.abc.Sequence)
19 |         self.transforms = []
20 |         for transform in transforms:
21 |             if isinstance(transform, dict):
22 |                 transform = build_from_cfg(transform, PIPELINES)
23 |                 self.transforms.append(transform)
24 |             elif callable(transform):
25 |                 self.transforms.append(transform)
26 |             else:
27 |                 raise TypeError('transform must be callable or a dict')
28 | 
29 |     def __call__(self, data):
30 |         """Call function to apply transforms sequentially.
31 | 
32 |         Args:
33 |             data (dict): A result dict contains the data to transform.
34 | 
35 |         Returns:
36 |            dict: Transformed data.
37 |         """
38 | 
39 |         for t in self.transforms:
40 |             data = t(data)
41 |             if data is None:
42 |                 return None
43 |         return data
44 | 
45 |     def __repr__(self):
46 |         format_string = self.__class__.__name__ + '('
47 |         for t in self.transforms:
48 |             format_string += '\n'
49 |             format_string += f'    {t}'
50 |         format_string += '\n)'
51 |         return format_string
52 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/instaboost.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..builder import PIPELINES
 4 | 
 5 | 
 6 | @PIPELINES.register_module()
 7 | class InstaBoost(object):
 8 |     r"""Data augmentation method in `InstaBoost: Boosting Instance
 9 |     Segmentation Via Probability Map Guided Copy-Pasting
10 |     <https://arxiv.org/abs/1908.07801>`_.
11 | 
12 |     Refer to https://github.com/GothicAi/Instaboost for implementation details.
13 |     """
14 | 
15 |     def __init__(self,
16 |                  action_candidate=('normal', 'horizontal', 'skip'),
17 |                  action_prob=(1, 0, 0),
18 |                  scale=(0.8, 1.2),
19 |                  dx=15,
20 |                  dy=15,
21 |                  theta=(-1, 1),
22 |                  color_prob=0.5,
23 |                  hflag=False,
24 |                  aug_ratio=0.5):
25 |         try:
26 |             import instaboostfast as instaboost
27 |         except ImportError:
28 |             raise ImportError(
29 |                 'Please run "pip install instaboostfast" '
30 |                 'to install instaboostfast first for instaboost augmentation.')
31 |         self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob,
32 |                                                scale, dx, dy, theta,
33 |                                                color_prob, hflag)
34 |         self.aug_ratio = aug_ratio
35 | 
36 |     def _load_anns(self, results):
37 |         labels = results['ann_info']['labels']
38 |         masks = results['ann_info']['masks']
39 |         bboxes = results['ann_info']['bboxes']
40 |         n = len(labels)
41 | 
42 |         anns = []
43 |         for i in range(n):
44 |             label = labels[i]
45 |             bbox = bboxes[i]
46 |             mask = masks[i]
47 |             x1, y1, x2, y2 = bbox
48 |             # assert (x2 - x1) >= 1 and (y2 - y1) >= 1
49 |             bbox = [x1, y1, x2 - x1, y2 - y1]
50 |             anns.append({
51 |                 'category_id': label,
52 |                 'segmentation': mask,
53 |                 'bbox': bbox
54 |             })
55 | 
56 |         return anns
57 | 
58 |     def _parse_anns(self, results, anns, img):
59 |         gt_bboxes = []
60 |         gt_labels = []
61 |         gt_masks_ann = []
62 |         for ann in anns:
63 |             x1, y1, w, h = ann['bbox']
64 |             # TODO: more essential bug need to be fixed in instaboost
65 |             if w <= 0 or h <= 0:
66 |                 continue
67 |             bbox = [x1, y1, x1 + w, y1 + h]
68 |             gt_bboxes.append(bbox)
69 |             gt_labels.append(ann['category_id'])
70 |             gt_masks_ann.append(ann['segmentation'])
71 |         gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
72 |         gt_labels = np.array(gt_labels, dtype=np.int64)
73 |         results['ann_info']['labels'] = gt_labels
74 |         results['ann_info']['bboxes'] = gt_bboxes
75 |         results['ann_info']['masks'] = gt_masks_ann
76 |         results['img'] = img
77 |         return results
78 | 
79 |     def __call__(self, results):
80 |         img = results['img']
81 |         orig_type = img.dtype
82 |         anns = self._load_anns(results)
83 |         if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]):
84 |             try:
85 |                 import instaboostfast as instaboost
86 |             except ImportError:
87 |                 raise ImportError('Please run "pip install instaboostfast" '
88 |                                   'to install instaboostfast first.')
89 |             anns, img = instaboost.get_new_data(
90 |                 anns, img.astype(np.uint8), self.cfg, background=None)
91 | 
92 |         results = self._parse_anns(results, anns, img.astype(orig_type))
93 |         return results
94 | 
95 |     def __repr__(self):
96 |         repr_str = self.__class__.__name__
97 |         repr_str += f'(cfg={self.cfg}, aug_ratio={self.aug_ratio})'
98 |         return repr_str
99 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import DistributedSampler
2 | from .group_sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler']
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | 
 6 | 
 7 | class DistributedSampler(_DistributedSampler):
 8 | 
 9 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
10 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
11 |         self.shuffle = shuffle
12 | 
13 |     def __iter__(self):
14 |         # deterministically shuffle based on epoch
15 |         if self.shuffle:
16 |             g = torch.Generator()
17 |             g.manual_seed(self.epoch)
18 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
19 |         else:
20 |             indices = torch.arange(len(self.dataset)).tolist()
21 | 
22 |         # add extra samples to make it evenly divisible
23 |         # in case that indices is shorter than half of total_size
24 |         indices = (indices *
25 |                    math.ceil(self.total_size / len(indices)))[:self.total_size]
26 |         assert len(indices) == self.total_size
27 | 
28 |         # subsample
29 |         indices = indices[self.rank:self.total_size:self.num_replicas]
30 |         assert len(indices) == self.num_samples
31 | 
32 |         return iter(indices)
33 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *  # noqa: F401,F403
 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 3 |                       ROI_EXTRACTORS, SHARED_HEADS, build_backbone,
 4 |                       build_detector, build_head, build_loss, build_neck,
 5 |                       build_roi_extractor, build_shared_head)
 6 | from .dense_heads import *  # noqa: F401,F403
 7 | from .detectors import *  # noqa: F401,F403
 8 | from .losses import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .roi_heads import *  # noqa: F401,F403
11 | 
12 | __all__ = [
13 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
14 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
15 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .darknet import Darknet
 2 | from .detectors_resnet import DetectoRS_ResNet
 3 | from .detectors_resnext import DetectoRS_ResNeXt
 4 | from .hourglass import HourglassNet
 5 | from .hrnet import HRNet
 6 | from .regnet import RegNet
 7 | from .res2net import Res2Net
 8 | from .resnest import ResNeSt
 9 | from .resnet import ResNet, ResNetV1d
10 | from .resnext import ResNeXt
11 | from .ssd_vgg import SSDVGG
12 | from .trident_resnet import TridentResNet
13 | 
14 | __all__ = [
15 |     'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
16 |     'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
17 |     'ResNeSt', 'TridentResNet'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from mmcv.utils import Registry, build_from_cfg
 4 | from torch import nn
 5 | 
 6 | BACKBONES = Registry('backbone')
 7 | NECKS = Registry('neck')
 8 | ROI_EXTRACTORS = Registry('roi_extractor')
 9 | SHARED_HEADS = Registry('shared_head')
10 | HEADS = Registry('head')
11 | LOSSES = Registry('loss')
12 | DETECTORS = Registry('detector')
13 | 
14 | 
15 | def build(cfg, registry, default_args=None):
16 |     """Build a module.
17 | 
18 |     Args:
19 |         cfg (dict, list[dict]): The config of modules, is is either a dict
20 |             or a list of configs.
21 |         registry (:obj:`Registry`): A registry the module belongs to.
22 |         default_args (dict, optional): Default arguments to build the module.
23 |             Defaults to None.
24 | 
25 |     Returns:
26 |         nn.Module: A built nn module.
27 |     """
28 |     if isinstance(cfg, list):
29 |         modules = [
30 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
31 |         ]
32 |         return nn.Sequential(*modules)
33 |     else:
34 |         return build_from_cfg(cfg, registry, default_args)
35 | 
36 | 
37 | def build_backbone(cfg):
38 |     """Build backbone."""
39 |     return build(cfg, BACKBONES)
40 | 
41 | 
42 | def build_neck(cfg):
43 |     """Build neck."""
44 |     return build(cfg, NECKS)
45 | 
46 | 
47 | def build_roi_extractor(cfg):
48 |     """Build roi extractor."""
49 |     return build(cfg, ROI_EXTRACTORS)
50 | 
51 | 
52 | def build_shared_head(cfg):
53 |     """Build shared head."""
54 |     return build(cfg, SHARED_HEADS)
55 | 
56 | 
57 | def build_head(cfg):
58 |     """Build head."""
59 |     return build(cfg, HEADS)
60 | 
61 | 
62 | def build_loss(cfg):
63 |     """Build loss."""
64 |     return build(cfg, LOSSES)
65 | 
66 | 
67 | def build_detector(cfg, train_cfg=None, test_cfg=None):
68 |     """Build detector."""
69 |     if train_cfg is not None or test_cfg is not None:
70 |         warnings.warn(
71 |             'train_cfg and test_cfg is deprecated, '
72 |             'please specify them in model', UserWarning)
73 |     assert cfg.get('train_cfg') is None or train_cfg is None, \
74 |         'train_cfg specified in both outer field and model field '
75 |     assert cfg.get('test_cfg') is None or test_cfg is None, \
76 |         'test_cfg specified in both outer field and model field '
77 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
78 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .rpn_head import RPNHead
3 | from .oln_rpn_head import OlnRPNHead
4 | 
5 | 
6 | __all__ = [
7 |     'AnchorHead', 'RPNHead', 'OlnRPNHead',
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/base_dense_head.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta):
 7 |     """Base class for DenseHeads."""
 8 | 
 9 |     def __init__(self):
10 |         super(BaseDenseHead, self).__init__()
11 | 
12 |     @abstractmethod
13 |     def loss(self, **kwargs):
14 |         """Compute losses of the head."""
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get_bboxes(self, **kwargs):
19 |         """Transform network output for a batch into bbox predictions."""
20 |         pass
21 | 
22 |     def forward_train(self,
23 |                       x,
24 |                       img_metas,
25 |                       gt_bboxes,
26 |                       gt_labels=None,
27 |                       gt_bboxes_ignore=None,
28 |                       proposal_cfg=None,
29 |                       **kwargs):
30 |         """
31 |         Args:
32 |             x (list[Tensor]): Features from FPN.
33 |             img_metas (list[dict]): Meta information of each image, e.g.,
34 |                 image size, scaling factor, etc.
35 |             gt_bboxes (Tensor): Ground truth bboxes of the image,
36 |                 shape (num_gts, 4).
37 |             gt_labels (Tensor): Ground truth labels of each box,
38 |                 shape (num_gts,).
39 |             gt_bboxes_ignore (Tensor): Ground truth bboxes to be
40 |                 ignored, shape (num_ignored_gts, 4).
41 |             proposal_cfg (mmcv.Config): Test / postprocessing configuration,
42 |                 if None, test_cfg would be used
43 | 
44 |         Returns:
45 |             tuple:
46 |                 losses: (dict[str, Tensor]): A dictionary of loss components.
47 |                 proposal_list (list[Tensor]): Proposals of each image.
48 |         """
49 |         outs = self(x)
50 |         if gt_labels is None:
51 |             loss_inputs = outs + (gt_bboxes, img_metas)
52 |         else:
53 |             loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
54 |         losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 |         if proposal_cfg is None:
56 |             return losses
57 |         else:
58 |             proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)
59 |             return losses, proposal_list
60 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/rpn_test_mixin.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from mmdet.core import merge_aug_proposals
 4 | 
 5 | if sys.version_info >= (3, 7):
 6 |     from mmdet.utils.contextmanagers import completed
 7 | 
 8 | 
 9 | class RPNTestMixin(object):
10 |     """Test methods of RPN."""
11 | 
12 |     if sys.version_info >= (3, 7):
13 | 
14 |         async def async_simple_test_rpn(self, x, img_metas):
15 |             sleep_interval = self.test_cfg.pop('async_sleep_interval', 0.025)
16 |             async with completed(
17 |                     __name__, 'rpn_head_forward',
18 |                     sleep_interval=sleep_interval):
19 |                 rpn_outs = self(x)
20 | 
21 |             proposal_list = self.get_bboxes(*rpn_outs, img_metas)
22 |             return proposal_list
23 | 
24 |     def simple_test_rpn(self, x, img_metas):
25 |         """Test without augmentation.
26 | 
27 |         Args:
28 |             x (tuple[Tensor]): Features from the upstream network, each is
29 |                 a 4D-tensor.
30 |             img_metas (list[dict]): Meta info of each image.
31 | 
32 |         Returns:
33 |             list[Tensor]: Proposals of each image.
34 |         """
35 |         rpn_outs = self(x)
36 |         proposal_list = self.get_bboxes(*rpn_outs, img_metas)
37 |         return proposal_list
38 | 
39 |     def aug_test_rpn(self, feats, img_metas):
40 |         samples_per_gpu = len(img_metas[0])
41 |         aug_proposals = [[] for _ in range(samples_per_gpu)]
42 |         for x, img_meta in zip(feats, img_metas):
43 |             proposal_list = self.simple_test_rpn(x, img_meta)
44 |             for i, proposals in enumerate(proposal_list):
45 |                 aug_proposals[i].append(proposals)
46 |         # reorganize the order of 'img_metas' to match the dimensions
47 |         # of 'aug_proposals'
48 |         aug_img_metas = []
49 |         for i in range(samples_per_gpu):
50 |             aug_img_meta = []
51 |             for j in range(len(img_metas)):
52 |                 aug_img_meta.append(img_metas[j][i])
53 |             aug_img_metas.append(aug_img_meta)
54 |         # after merging, proposals will be rescaled to the original image size
55 |         merged_proposals = [
56 |             merge_aug_proposals(proposals, aug_img_meta, self.test_cfg)
57 |             for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)
58 |         ]
59 |         return merged_proposals
60 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .faster_rcnn import FasterRCNN
 3 | from .mask_rcnn import MaskRCNN
 4 | from .rpn import RPN
 5 | from .two_stage import TwoStageDetector
 6 | #
 7 | from .rpn_detector import RPNDetector
 8 | __all__ = [
 9 |     'BaseDetector', 'TwoStageDetector', 'RPN', 'FasterRCNN', 'MaskRCNN',
10 |     'RPNDetector'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..builder import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module()
 6 | class FasterRCNN(TwoStageDetector):
 7 |     """Implementation of `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_"""
 8 | 
 9 |     def __init__(self,
10 |                  backbone,
11 |                  rpn_head,
12 |                  roi_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  pretrained=None):
17 |         super(FasterRCNN, self).__init__(
18 |             backbone=backbone,
19 |             neck=neck,
20 |             rpn_head=rpn_head,
21 |             roi_head=roi_head,
22 |             train_cfg=train_cfg,
23 |             test_cfg=test_cfg,
24 |             pretrained=pretrained)
25 |         


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..builder import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module()
 6 | class MaskRCNN(TwoStageDetector):
 7 |     """Implementation of `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_"""
 8 | 
 9 |     def __init__(self,
10 |                  backbone,
11 |                  rpn_head,
12 |                  roi_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  pretrained=None):
17 |         super(MaskRCNN, self).__init__(
18 |             backbone=backbone,
19 |             neck=neck,
20 |             rpn_head=rpn_head,
21 |             roi_head=roi_head,
22 |             train_cfg=train_cfg,
23 |             test_cfg=test_cfg,
24 |             pretrained=pretrained)
25 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn_detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core import bbox2result
 4 | from ..builder import DETECTORS
 5 | from .rpn import RPN
 6 | 
 7 | 
 8 | @DETECTORS.register_module()
 9 | class RPNDetector(RPN):
10 | 
11 |     def simple_test(self, img, img_metas, rescale=False):
12 |         """Test function without test time augmentation.
13 | 
14 |         Args:
15 |             imgs (list[torch.Tensor]): List of multiple images
16 |             img_metas (list[dict]): List of image information.
17 |             rescale (bool, optional): Whether to rescale the results.
18 |                 Defaults to False.
19 | 
20 |         Returns:
21 |             list[np.ndarray]: proposals
22 |         """
23 |         x = self.extract_feat(img)
24 |         proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
25 |         if rescale:
26 |             for proposals, meta in zip(proposal_list, img_metas):
27 |                 proposals[:, :4] /= proposals.new_tensor(meta['scale_factor'])
28 | 
29 |         # Convert the rpn-proposals into bbox results format. <
30 |         # proposal_list[0].shape = [200,5]
31 |         bbox_results = []
32 |         for det_bboxes in proposal_list:
33 |             det_labels = torch.zeros((det_bboxes.size(0))).to(
34 |                 det_bboxes.device)
35 |             bbox_results.append(
36 |                 bbox2result(det_bboxes, det_labels, num_classes=1))
37 | 
38 |         return bbox_results
39 |         # >


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .ae_loss import AssociativeEmbeddingLoss
 3 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 4 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 5 |                                  cross_entropy, mask_cross_entropy)
 6 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 7 | from .gaussian_focal_loss import GaussianFocalLoss
 8 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss
 9 | from .ghm_loss import GHMC, GHMR
10 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss,
11 |                        bounded_iou_loss, iou_loss)
12 | from .mse_loss import MSELoss, mse_loss
13 | from .pisa_loss import carl_loss, isr_p
14 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss
15 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
16 | from .varifocal_loss import VarifocalLoss
17 | 
18 | __all__ = [
19 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
20 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
21 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
22 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
23 |     'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC',
24 |     'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss',
25 |     'l1_loss', 'isr_p', 'carl_loss', 'AssociativeEmbeddingLoss',
26 |     'GaussianFocalLoss', 'QualityFocalLoss', 'DistributionFocalLoss',
27 |     'VarifocalLoss'
28 | ]
29 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1, thresh=None):
 5 |     """Calculate accuracy according to the prediction and target.
 6 | 
 7 |     Args:
 8 |         pred (torch.Tensor): The model prediction, shape (N, num_class)
 9 |         target (torch.Tensor): The target of each prediction, shape (N, )
10 |         topk (int | tuple[int], optional): If the predictions in ``topk``
11 |             matches the target, the predictions will be regarded as
12 |             correct ones. Defaults to 1.
13 |         thresh (float, optional): If not None, predictions with scores under
14 |             this threshold are considered incorrect. Default to None.
15 | 
16 |     Returns:
17 |         float | tuple[float]: If the input ``topk`` is a single integer,
18 |             the function will return a single float as accuracy. If
19 |             ``topk`` is a tuple containing multiple integers, the
20 |             function will return a tuple containing accuracies of
21 |             each ``topk`` number.
22 |     """
23 |     assert isinstance(topk, (int, tuple))
24 |     if isinstance(topk, int):
25 |         topk = (topk, )
26 |         return_single = True
27 |     else:
28 |         return_single = False
29 | 
30 |     maxk = max(topk)
31 |     if pred.size(0) == 0:
32 |         accu = [pred.new_tensor(0.) for i in range(len(topk))]
33 |         return accu[0] if return_single else accu
34 |     assert pred.ndim == 2 and target.ndim == 1
35 |     assert pred.size(0) == target.size(0)
36 |     assert maxk <= pred.size(1), \
37 |         f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
38 |     pred_value, pred_label = pred.topk(maxk, dim=1)
39 |     pred_label = pred_label.t()  # transpose to shape (maxk, N)
40 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
41 |     if thresh is not None:
42 |         # Only prediction values larger than thresh are counted as correct
43 |         correct = correct & (pred_value > thresh).t()
44 |     res = []
45 |     for k in topk:
46 |         correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
47 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
48 |     return res[0] if return_single else res
49 | 
50 | 
51 | class Accuracy(nn.Module):
52 | 
53 |     def __init__(self, topk=(1, ), thresh=None):
54 |         """Module to calculate the accuracy.
55 | 
56 |         Args:
57 |             topk (tuple, optional): The criterion used to calculate the
58 |                 accuracy. Defaults to (1,).
59 |             thresh (float, optional): If not None, predictions with scores
60 |                 under this threshold are considered incorrect. Default to None.
61 |         """
62 |         super().__init__()
63 |         self.topk = topk
64 |         self.thresh = thresh
65 | 
66 |     def forward(self, pred, target):
67 |         """Forward function to calculate accuracy.
68 | 
69 |         Args:
70 |             pred (torch.Tensor): Prediction of models.
71 |             target (torch.Tensor): Target for each prediction.
72 | 
73 |         Returns:
74 |             tuple[float]: The accuracies under different topk criterions.
75 |         """
76 |         return accuracy(pred, target, self.topk, self.thresh)
77 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/gaussian_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..builder import LOSSES
 4 | from .utils import weighted_loss
 5 | 
 6 | 
 7 | @weighted_loss
 8 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
 9 |     """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
10 |     distribution.
11 | 
12 |     Args:
13 |         pred (torch.Tensor): The prediction.
14 |         gaussian_target (torch.Tensor): The learning target of the prediction
15 |             in gaussian distribution.
16 |         alpha (float, optional): A balanced form for Focal Loss.
17 |             Defaults to 2.0.
18 |         gamma (float, optional): The gamma for calculating the modulating
19 |             factor. Defaults to 4.0.
20 |     """
21 |     eps = 1e-12
22 |     pos_weights = gaussian_target.eq(1)
23 |     neg_weights = (1 - gaussian_target).pow(gamma)
24 |     pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
25 |     neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
26 |     return pos_loss + neg_loss
27 | 
28 | 
29 | @LOSSES.register_module()
30 | class GaussianFocalLoss(nn.Module):
31 |     """GaussianFocalLoss is a variant of focal loss.
32 | 
33 |     More details can be found in the `paper
34 |     <https://arxiv.org/abs/1808.01244>`_
35 |     Code is modified from `kp_utils.py
36 |     <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501
37 |     Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
38 |     not 0/1 binary target.
39 | 
40 |     Args:
41 |         alpha (float): Power of prediction.
42 |         gamma (float): Power of target for negtive samples.
43 |         reduction (str): Options are "none", "mean" and "sum".
44 |         loss_weight (float): Loss weight of current loss.
45 |     """
46 | 
47 |     def __init__(self,
48 |                  alpha=2.0,
49 |                  gamma=4.0,
50 |                  reduction='mean',
51 |                  loss_weight=1.0):
52 |         super(GaussianFocalLoss, self).__init__()
53 |         self.alpha = alpha
54 |         self.gamma = gamma
55 |         self.reduction = reduction
56 |         self.loss_weight = loss_weight
57 | 
58 |     def forward(self,
59 |                 pred,
60 |                 target,
61 |                 weight=None,
62 |                 avg_factor=None,
63 |                 reduction_override=None):
64 |         """Forward function.
65 | 
66 |         Args:
67 |             pred (torch.Tensor): The prediction.
68 |             target (torch.Tensor): The learning target of the prediction
69 |                 in gaussian distribution.
70 |             weight (torch.Tensor, optional): The weight of loss for each
71 |                 prediction. Defaults to None.
72 |             avg_factor (int, optional): Average factor that is used to average
73 |                 the loss. Defaults to None.
74 |             reduction_override (str, optional): The reduction method used to
75 |                 override the original reduction method of the loss.
76 |                 Defaults to None.
77 |         """
78 |         assert reduction_override in (None, 'none', 'mean', 'sum')
79 |         reduction = (
80 |             reduction_override if reduction_override else self.reduction)
81 |         loss_reg = self.loss_weight * gaussian_focal_loss(
82 |             pred,
83 |             target,
84 |             weight,
85 |             alpha=self.alpha,
86 |             gamma=self.gamma,
87 |             reduction=reduction,
88 |             avg_factor=avg_factor)
89 |         return loss_reg
90 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..builder import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def mse_loss(pred, target):
10 |     """Warpper of mse loss."""
11 |     return F.mse_loss(pred, target, reduction='none')
12 | 
13 | 
14 | @LOSSES.register_module()
15 | class MSELoss(nn.Module):
16 |     """MSELoss.
17 | 
18 |     Args:
19 |         reduction (str, optional): The method that reduces the loss to a
20 |             scalar. Options are "none", "mean" and "sum".
21 |         loss_weight (float, optional): The weight of the loss. Defaults to 1.0
22 |     """
23 | 
24 |     def __init__(self, reduction='mean', loss_weight=1.0):
25 |         super().__init__()
26 |         self.reduction = reduction
27 |         self.loss_weight = loss_weight
28 | 
29 |     def forward(self, pred, target, weight=None, avg_factor=None):
30 |         """Forward function of loss.
31 | 
32 |         Args:
33 |             pred (torch.Tensor): The prediction.
34 |             target (torch.Tensor): The learning target of the prediction.
35 |             weight (torch.Tensor, optional): Weight of the loss for each
36 |                 prediction. Defaults to None.
37 |             avg_factor (int, optional): Average factor that is used to average
38 |                 the loss. Defaults to None.
39 | 
40 |         Returns:
41 |             torch.Tensor: The calculated loss
42 |         """
43 |         loss = self.loss_weight * mse_loss(
44 |             pred,
45 |             target,
46 |             weight,
47 |             reduction=self.reduction,
48 |             avg_factor=avg_factor)
49 |         return loss
50 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | 
3 | __all__ = [
4 |     'FPN', 
5 | ]
6 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_roi_head import BaseRoIHead
 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, Shared2FCBBoxHead,
 3 |                          Shared4Conv1FCBBoxHead)
 4 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FusedSemanticHead,
 5 |                          GridHead, HTCMaskHead, MaskIoUHead, MaskPointHead)
 6 | from .roi_extractors import SingleRoIExtractor
 7 | from .shared_heads import ResLayer
 8 | from .standard_roi_head import StandardRoIHead
 9 | 
10 | from .oln_roi_head import OlnRoIHead
11 | 
12 | __all__ = [
13 |     'BaseRoIHead', 'ResLayer', 'BBoxHead',
14 |     'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'StandardRoIHead',
15 |     'Shared4Conv1FCBBoxHead', 'FCNMaskHead',
16 |     'SingleRoIExtractor', 'OlnRoIHead'
17 | ]
18 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/base_roi_head.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta, abstractmethod
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | from ..builder import build_shared_head
  6 | 
  7 | 
  8 | class BaseRoIHead(nn.Module, metaclass=ABCMeta):
  9 |     """Base class for RoIHeads."""
 10 | 
 11 |     def __init__(self,
 12 |                  bbox_roi_extractor=None,
 13 |                  bbox_head=None,
 14 |                  mask_roi_extractor=None,
 15 |                  mask_head=None,
 16 |                  shared_head=None,
 17 |                  train_cfg=None,
 18 |                  test_cfg=None):
 19 |         super(BaseRoIHead, self).__init__()
 20 |         self.train_cfg = train_cfg
 21 |         self.test_cfg = test_cfg
 22 |         if shared_head is not None:
 23 |             self.shared_head = build_shared_head(shared_head)
 24 | 
 25 |         if bbox_head is not None:
 26 |             self.init_bbox_head(bbox_roi_extractor, bbox_head)
 27 | 
 28 |         if mask_head is not None:
 29 |             self.init_mask_head(mask_roi_extractor, mask_head)
 30 | 
 31 |         self.init_assigner_sampler()
 32 | 
 33 |     @property
 34 |     def with_bbox(self):
 35 |         """bool: whether the RoI head contains a `bbox_head`"""
 36 |         return hasattr(self, 'bbox_head') and self.bbox_head is not None
 37 | 
 38 |     @property
 39 |     def with_mask(self):
 40 |         """bool: whether the RoI head contains a `mask_head`"""
 41 |         return hasattr(self, 'mask_head') and self.mask_head is not None
 42 | 
 43 |     @property
 44 |     def with_shared_head(self):
 45 |         """bool: whether the RoI head contains a `shared_head`"""
 46 |         return hasattr(self, 'shared_head') and self.shared_head is not None
 47 | 
 48 |     @abstractmethod
 49 |     def init_weights(self, pretrained):
 50 |         """Initialize the weights in head.
 51 | 
 52 |         Args:
 53 |             pretrained (str, optional): Path to pre-trained weights.
 54 |                 Defaults to None.
 55 |         """
 56 |         pass
 57 | 
 58 |     @abstractmethod
 59 |     def init_bbox_head(self):
 60 |         """Initialize ``bbox_head``"""
 61 |         pass
 62 | 
 63 |     @abstractmethod
 64 |     def init_mask_head(self):
 65 |         """Initialize ``mask_head``"""
 66 |         pass
 67 | 
 68 |     @abstractmethod
 69 |     def init_assigner_sampler(self):
 70 |         """Initialize assigner and sampler."""
 71 |         pass
 72 | 
 73 |     @abstractmethod
 74 |     def forward_train(self,
 75 |                       x,
 76 |                       img_meta,
 77 |                       proposal_list,
 78 |                       gt_bboxes,
 79 |                       gt_labels,
 80 |                       gt_bboxes_ignore=None,
 81 |                       gt_masks=None,
 82 |                       **kwargs):
 83 |         """Forward function during training."""
 84 |         pass
 85 | 
 86 |     async def async_simple_test(self, x, img_meta, **kwargs):
 87 |         """Asynchronized test function."""
 88 |         raise NotImplementedError
 89 | 
 90 |     def simple_test(self,
 91 |                     x,
 92 |                     proposal_list,
 93 |                     img_meta,
 94 |                     proposals=None,
 95 |                     rescale=False,
 96 |                     **kwargs):
 97 |         """Test without augmentation."""
 98 |         pass
 99 | 
100 |     def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
101 |         """Test with augmentations.
102 | 
103 |         If rescale is False, then returned bboxes and masks will fit the scale
104 |         of imgs[0].
105 |         """
106 |         pass
107 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox_head import BBoxHead
 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead,
 3 |                                Shared4Conv1FCBBoxHead)
 4 | from .convfc_bbox_score_head import (ConvFCBBoxScoreHead, 
 5 | 									 Shared2FCBBoxScoreHead)
 6 | __all__ = [
 7 |     'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
 8 |     'Shared4Conv1FCBBoxHead', 'ConvFCBBoxScoreHead', 'Shared2FCBBoxScoreHead'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coarse_mask_head import CoarseMaskHead
 2 | from .fcn_mask_head import FCNMaskHead
 3 | from .fused_semantic_head import FusedSemanticHead
 4 | from .grid_head import GridHead
 5 | from .htc_mask_head import HTCMaskHead
 6 | from .mask_point_head import MaskPointHead
 7 | from .maskiou_head import MaskIoUHead
 8 | 
 9 | __all__ = [
10 |     'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
11 |     'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/coarse_mask_head.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import ConvModule, Linear, constant_init, xavier_init
 3 | from mmcv.runner import auto_fp16
 4 | 
 5 | from mmdet.models.builder import HEADS
 6 | from .fcn_mask_head import FCNMaskHead
 7 | 
 8 | 
 9 | @HEADS.register_module()
10 | class CoarseMaskHead(FCNMaskHead):
11 |     """Coarse mask head used in PointRend.
12 | 
13 |     Compared with standard ``FCNMaskHead``, ``CoarseMaskHead`` will downsample
14 |     the input feature map instead of upsample it.
15 | 
16 |     Args:
17 |         num_convs (int): Number of conv layers in the head. Default: 0.
18 |         num_fcs (int): Number of fc layers in the head. Default: 2.
19 |         fc_out_channels (int): Number of output channels of fc layer.
20 |             Default: 1024.
21 |         downsample_factor (int): The factor that feature map is downsampled by.
22 |             Default: 2.
23 |     """
24 | 
25 |     def __init__(self,
26 |                  num_convs=0,
27 |                  num_fcs=2,
28 |                  fc_out_channels=1024,
29 |                  downsample_factor=2,
30 |                  *arg,
31 |                  **kwarg):
32 |         super(CoarseMaskHead, self).__init__(
33 |             *arg, num_convs=num_convs, upsample_cfg=dict(type=None), **kwarg)
34 |         self.num_fcs = num_fcs
35 |         assert self.num_fcs > 0
36 |         self.fc_out_channels = fc_out_channels
37 |         self.downsample_factor = downsample_factor
38 |         assert self.downsample_factor >= 1
39 |         # remove conv_logit
40 |         delattr(self, 'conv_logits')
41 | 
42 |         if downsample_factor > 1:
43 |             downsample_in_channels = (
44 |                 self.conv_out_channels
45 |                 if self.num_convs > 0 else self.in_channels)
46 |             self.downsample_conv = ConvModule(
47 |                 downsample_in_channels,
48 |                 self.conv_out_channels,
49 |                 kernel_size=downsample_factor,
50 |                 stride=downsample_factor,
51 |                 padding=0,
52 |                 conv_cfg=self.conv_cfg,
53 |                 norm_cfg=self.norm_cfg)
54 |         else:
55 |             self.downsample_conv = None
56 | 
57 |         self.output_size = (self.roi_feat_size[0] // downsample_factor,
58 |                             self.roi_feat_size[1] // downsample_factor)
59 |         self.output_area = self.output_size[0] * self.output_size[1]
60 | 
61 |         last_layer_dim = self.conv_out_channels * self.output_area
62 | 
63 |         self.fcs = nn.ModuleList()
64 |         for i in range(num_fcs):
65 |             fc_in_channels = (
66 |                 last_layer_dim if i == 0 else self.fc_out_channels)
67 |             self.fcs.append(Linear(fc_in_channels, self.fc_out_channels))
68 |         last_layer_dim = self.fc_out_channels
69 |         output_channels = self.num_classes * self.output_area
70 |         self.fc_logits = Linear(last_layer_dim, output_channels)
71 | 
72 |     def init_weights(self):
73 |         for m in self.fcs.modules():
74 |             if isinstance(m, nn.Linear):
75 |                 xavier_init(m)
76 |         constant_init(self.fc_logits, 0.001)
77 | 
78 |     @auto_fp16()
79 |     def forward(self, x):
80 |         for conv in self.convs:
81 |             x = conv(x)
82 | 
83 |         if self.downsample_conv is not None:
84 |             x = self.downsample_conv(x)
85 | 
86 |         x = x.flatten(1)
87 |         for fc in self.fcs:
88 |             x = self.relu(fc(x))
89 |         mask_pred = self.fc_logits(x).view(
90 |             x.size(0), self.num_classes, *self.output_size)
91 |         return mask_pred
92 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn import ConvModule
 2 | 
 3 | from mmdet.models.builder import HEADS
 4 | from .fcn_mask_head import FCNMaskHead
 5 | 
 6 | 
 7 | @HEADS.register_module()
 8 | class HTCMaskHead(FCNMaskHead):
 9 | 
10 |     def __init__(self, with_conv_res=True, *args, **kwargs):
11 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
12 |         self.with_conv_res = with_conv_res
13 |         if self.with_conv_res:
14 |             self.conv_res = ConvModule(
15 |                 self.conv_out_channels,
16 |                 self.conv_out_channels,
17 |                 1,
18 |                 conv_cfg=self.conv_cfg,
19 |                 norm_cfg=self.norm_cfg)
20 | 
21 |     def init_weights(self):
22 |         super(HTCMaskHead, self).init_weights()
23 |         if self.with_conv_res:
24 |             self.conv_res.init_weights()
25 | 
26 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
27 |         if res_feat is not None:
28 |             assert self.with_conv_res
29 |             res_feat = self.conv_res(res_feat)
30 |             x = x + res_feat
31 |         for conv in self.convs:
32 |             x = conv(x)
33 |         res_feat = x
34 |         outs = []
35 |         if return_logits:
36 |             x = self.upsample(x)
37 |             if self.upsample_method == 'deconv':
38 |                 x = self.relu(x)
39 |             mask_pred = self.conv_logits(x)
40 |             outs.append(mask_pred)
41 |         if return_feat:
42 |             outs.append(res_feat)
43 |         return outs if len(outs) > 1 else outs[0]
44 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .generic_roi_extractor import GenericRoIExtractor
2 | from .single_level_roi_extractor import SingleRoIExtractor
3 | 
4 | __all__ = [
5 |     'SingleRoIExtractor',
6 |     'GenericRoIExtractor',
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from mmcv import ops
 6 | 
 7 | 
 8 | class BaseRoIExtractor(nn.Module, metaclass=ABCMeta):
 9 |     """Base class for RoI extractor.
10 | 
11 |     Args:
12 |         roi_layer (dict): Specify RoI layer type and arguments.
13 |         out_channels (int): Output channels of RoI layers.
14 |         featmap_strides (int): Strides of input feature maps.
15 |     """
16 | 
17 |     def __init__(self, roi_layer, out_channels, featmap_strides):
18 |         super(BaseRoIExtractor, self).__init__()
19 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
20 |         self.out_channels = out_channels
21 |         self.featmap_strides = featmap_strides
22 |         self.fp16_enabled = False
23 | 
24 |     @property
25 |     def num_inputs(self):
26 |         """int: Number of input feature maps."""
27 |         return len(self.featmap_strides)
28 | 
29 |     def init_weights(self):
30 |         pass
31 | 
32 |     def build_roi_layers(self, layer_cfg, featmap_strides):
33 |         """Build RoI operator to extract feature from each level feature map.
34 | 
35 |         Args:
36 |             layer_cfg (dict): Dictionary to construct and config RoI layer
37 |                 operation. Options are modules under ``mmcv/ops`` such as
38 |                 ``RoIAlign``.
39 |             featmap_strides (int): The stride of input feature map w.r.t to the
40 |                 original image size, which would be used to scale RoI
41 |                 coordinate (original image coordinate system) to feature
42 |                 coordinate system.
43 | 
44 |         Returns:
45 |             nn.ModuleList: The RoI extractor modules for each level feature
46 |                 map.
47 |         """
48 | 
49 |         cfg = layer_cfg.copy()
50 |         layer_type = cfg.pop('type')
51 |         assert hasattr(ops, layer_type)
52 |         layer_cls = getattr(ops, layer_type)
53 |         roi_layers = nn.ModuleList(
54 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
55 |         return roi_layers
56 | 
57 |     def roi_rescale(self, rois, scale_factor):
58 |         """Scale RoI coordinates by scale factor.
59 | 
60 |         Args:
61 |             rois (torch.Tensor): RoI (Region of Interest), shape (n, 5)
62 |             scale_factor (float): Scale factor that RoI will be multiplied by.
63 | 
64 |         Returns:
65 |             torch.Tensor: Scaled RoI.
66 |         """
67 | 
68 |         cx = (rois[:, 1] + rois[:, 3]) * 0.5
69 |         cy = (rois[:, 2] + rois[:, 4]) * 0.5
70 |         w = rois[:, 3] - rois[:, 1]
71 |         h = rois[:, 4] - rois[:, 2]
72 |         new_w = w * scale_factor
73 |         new_h = h * scale_factor
74 |         x1 = cx - new_w * 0.5
75 |         x2 = cx + new_w * 0.5
76 |         y1 = cy - new_h * 0.5
77 |         y2 = cy + new_h * 0.5
78 |         new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1)
79 |         return new_rois
80 | 
81 |     @abstractmethod
82 |     def forward(self, feats, rois, roi_scale_factor=None):
83 |         pass
84 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn.bricks import build_plugin_layer
 2 | from mmcv.runner import force_fp32
 3 | 
 4 | from mmdet.models.builder import ROI_EXTRACTORS
 5 | from .base_roi_extractor import BaseRoIExtractor
 6 | 
 7 | 
 8 | @ROI_EXTRACTORS.register_module()
 9 | class GenericRoIExtractor(BaseRoIExtractor):
10 |     """Extract RoI features from all level feature maps levels.
11 | 
12 |     This is the implementation of `A novel Region of Interest Extraction Layer
13 |     for Instance Segmentation <https://arxiv.org/abs/2004.13665>`_.
14 | 
15 |     Args:
16 |         aggregation (str): The method to aggregate multiple feature maps.
17 |             Options are 'sum', 'concat'. Default: 'sum'.
18 |         pre_cfg (dict | None): Specify pre-processing modules. Default: None.
19 |         post_cfg (dict | None): Specify post-processing modules. Default: None.
20 |         kwargs (keyword arguments): Arguments that are the same
21 |             as :class:`BaseRoIExtractor`.
22 |     """
23 | 
24 |     def __init__(self,
25 |                  aggregation='sum',
26 |                  pre_cfg=None,
27 |                  post_cfg=None,
28 |                  **kwargs):
29 |         super(GenericRoIExtractor, self).__init__(**kwargs)
30 | 
31 |         assert aggregation in ['sum', 'concat']
32 | 
33 |         self.aggregation = aggregation
34 |         self.with_post = post_cfg is not None
35 |         self.with_pre = pre_cfg is not None
36 |         # build pre/post processing modules
37 |         if self.with_post:
38 |             self.post_module = build_plugin_layer(post_cfg, '_post_module')[1]
39 |         if self.with_pre:
40 |             self.pre_module = build_plugin_layer(pre_cfg, '_pre_module')[1]
41 | 
42 |     @force_fp32(apply_to=('feats', ), out_fp16=True)
43 |     def forward(self, feats, rois, roi_scale_factor=None):
44 |         """Forward function."""
45 |         if len(feats) == 1:
46 |             return self.roi_layers[0](feats[0], rois)
47 | 
48 |         out_size = self.roi_layers[0].output_size
49 |         num_levels = len(feats)
50 |         roi_feats = feats[0].new_zeros(
51 |             rois.size(0), self.out_channels, *out_size)
52 | 
53 |         # some times rois is an empty tensor
54 |         if roi_feats.shape[0] == 0:
55 |             return roi_feats
56 | 
57 |         if roi_scale_factor is not None:
58 |             rois = self.roi_rescale(rois, roi_scale_factor)
59 | 
60 |         # mark the starting channels for concat mode
61 |         start_channels = 0
62 |         for i in range(num_levels):
63 |             roi_feats_t = self.roi_layers[i](feats[i], rois)
64 |             end_channels = start_channels + roi_feats_t.size(1)
65 |             if self.with_pre:
66 |                 # apply pre-processing to a RoI extracted from each layer
67 |                 roi_feats_t = self.pre_module(roi_feats_t)
68 |             if self.aggregation == 'sum':
69 |                 # and sum them all
70 |                 roi_feats += roi_feats_t
71 |             else:
72 |                 # and concat them along channel dimension
73 |                 roi_feats[:, start_channels:end_channels] = roi_feats_t
74 |             # update channels starting position
75 |             start_channels = end_channels
76 |         # check if concat channels match at the end
77 |         if self.aggregation == 'concat':
78 |             assert start_channels == self.out_channels
79 | 
80 |         if self.with_post:
81 |             # apply post-processing before return the result
82 |             roi_feats = self.post_module(roi_feats)
83 |         return roi_feats
84 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import constant_init, kaiming_init
 3 | from mmcv.runner import auto_fp16, load_checkpoint
 4 | 
 5 | from mmdet.models.backbones import ResNet
 6 | from mmdet.models.builder import SHARED_HEADS
 7 | from mmdet.models.utils import ResLayer as _ResLayer
 8 | from mmdet.utils import get_root_logger
 9 | 
10 | 
11 | @SHARED_HEADS.register_module()
12 | class ResLayer(nn.Module):
13 | 
14 |     def __init__(self,
15 |                  depth,
16 |                  stage=3,
17 |                  stride=2,
18 |                  dilation=1,
19 |                  style='pytorch',
20 |                  norm_cfg=dict(type='BN', requires_grad=True),
21 |                  norm_eval=True,
22 |                  with_cp=False,
23 |                  dcn=None):
24 |         super(ResLayer, self).__init__()
25 |         self.norm_eval = norm_eval
26 |         self.norm_cfg = norm_cfg
27 |         self.stage = stage
28 |         self.fp16_enabled = False
29 |         block, stage_blocks = ResNet.arch_settings[depth]
30 |         stage_block = stage_blocks[stage]
31 |         planes = 64 * 2**stage
32 |         inplanes = 64 * 2**(stage - 1) * block.expansion
33 | 
34 |         res_layer = _ResLayer(
35 |             block,
36 |             inplanes,
37 |             planes,
38 |             stage_block,
39 |             stride=stride,
40 |             dilation=dilation,
41 |             style=style,
42 |             with_cp=with_cp,
43 |             norm_cfg=self.norm_cfg,
44 |             dcn=dcn)
45 |         self.add_module(f'layer{stage + 1}', res_layer)
46 | 
47 |     def init_weights(self, pretrained=None):
48 |         """Initialize the weights in the module.
49 | 
50 |         Args:
51 |             pretrained (str, optional): Path to pre-trained weights.
52 |                 Defaults to None.
53 |         """
54 |         if isinstance(pretrained, str):
55 |             logger = get_root_logger()
56 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
57 |         elif pretrained is None:
58 |             for m in self.modules():
59 |                 if isinstance(m, nn.Conv2d):
60 |                     kaiming_init(m)
61 |                 elif isinstance(m, nn.BatchNorm2d):
62 |                     constant_init(m, 1)
63 |         else:
64 |             raise TypeError('pretrained must be a str or None')
65 | 
66 |     @auto_fp16()
67 |     def forward(self, x):
68 |         res_layer = getattr(self, f'layer{self.stage + 1}')
69 |         out = res_layer(x)
70 |         return out
71 | 
72 |     def train(self, mode=True):
73 |         super(ResLayer, self).train(mode)
74 |         if self.norm_eval:
75 |             for m in self.modules():
76 |                 if isinstance(m, nn.BatchNorm2d):
77 |                     m.eval()
78 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_positional_encoding, build_transformer
 2 | from .gaussian_target import gaussian_radius, gen_gaussian_target
 3 | from .positional_encoding import (LearnedPositionalEncoding,
 4 |                                   SinePositionalEncoding)
 5 | from .res_layer import ResLayer
 6 | from .transformer import (FFN, MultiheadAttention, Transformer,
 7 |                           TransformerDecoder, TransformerDecoderLayer,
 8 |                           TransformerEncoder, TransformerEncoderLayer)
 9 | 
10 | __all__ = [
11 |     'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 'MultiheadAttention',
12 |     'FFN', 'TransformerEncoderLayer', 'TransformerEncoder',
13 |     'TransformerDecoderLayer', 'TransformerDecoder', 'Transformer',
14 |     'build_transformer', 'build_positional_encoding', 'SinePositionalEncoding',
15 |     'LearnedPositionalEncoding'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | 
 3 | TRANSFORMER = Registry('Transformer')
 4 | POSITIONAL_ENCODING = Registry('Position encoding')
 5 | 
 6 | 
 7 | def build_transformer(cfg, default_args=None):
 8 |     """Builder for Transformer."""
 9 |     return build_from_cfg(cfg, TRANSFORMER, default_args)
10 | 
11 | 
12 | def build_positional_encoding(cfg, default_args=None):
13 |     """Builder for Position Encoding."""
14 |     return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)
15 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/res_layer.py:
--------------------------------------------------------------------------------
  1 | from mmcv.cnn import build_conv_layer, build_norm_layer
  2 | from torch import nn as nn
  3 | 
  4 | 
  5 | class ResLayer(nn.Sequential):
  6 |     """ResLayer to build ResNet style backbone.
  7 | 
  8 |     Args:
  9 |         block (nn.Module): block used to build ResLayer.
 10 |         inplanes (int): inplanes of block.
 11 |         planes (int): planes of block.
 12 |         num_blocks (int): number of blocks.
 13 |         stride (int): stride of the first block. Default: 1
 14 |         avg_down (bool): Use AvgPool instead of stride conv when
 15 |             downsampling in the bottleneck. Default: False
 16 |         conv_cfg (dict): dictionary to construct and config conv layer.
 17 |             Default: None
 18 |         norm_cfg (dict): dictionary to construct and config norm layer.
 19 |             Default: dict(type='BN')
 20 |         downsample_first (bool): Downsample at the first block or last block.
 21 |             False for Hourglass, True for ResNet. Default: True
 22 |     """
 23 | 
 24 |     def __init__(self,
 25 |                  block,
 26 |                  inplanes,
 27 |                  planes,
 28 |                  num_blocks,
 29 |                  stride=1,
 30 |                  avg_down=False,
 31 |                  conv_cfg=None,
 32 |                  norm_cfg=dict(type='BN'),
 33 |                  downsample_first=True,
 34 |                  **kwargs):
 35 |         self.block = block
 36 | 
 37 |         downsample = None
 38 |         if stride != 1 or inplanes != planes * block.expansion:
 39 |             downsample = []
 40 |             conv_stride = stride
 41 |             if avg_down:
 42 |                 conv_stride = 1
 43 |                 downsample.append(
 44 |                     nn.AvgPool2d(
 45 |                         kernel_size=stride,
 46 |                         stride=stride,
 47 |                         ceil_mode=True,
 48 |                         count_include_pad=False))
 49 |             downsample.extend([
 50 |                 build_conv_layer(
 51 |                     conv_cfg,
 52 |                     inplanes,
 53 |                     planes * block.expansion,
 54 |                     kernel_size=1,
 55 |                     stride=conv_stride,
 56 |                     bias=False),
 57 |                 build_norm_layer(norm_cfg, planes * block.expansion)[1]
 58 |             ])
 59 |             downsample = nn.Sequential(*downsample)
 60 | 
 61 |         layers = []
 62 |         if downsample_first:
 63 |             layers.append(
 64 |                 block(
 65 |                     inplanes=inplanes,
 66 |                     planes=planes,
 67 |                     stride=stride,
 68 |                     downsample=downsample,
 69 |                     conv_cfg=conv_cfg,
 70 |                     norm_cfg=norm_cfg,
 71 |                     **kwargs))
 72 |             inplanes = planes * block.expansion
 73 |             for _ in range(1, num_blocks):
 74 |                 layers.append(
 75 |                     block(
 76 |                         inplanes=inplanes,
 77 |                         planes=planes,
 78 |                         stride=1,
 79 |                         conv_cfg=conv_cfg,
 80 |                         norm_cfg=norm_cfg,
 81 |                         **kwargs))
 82 | 
 83 |         else:  # downsample_first=False is for HourglassModule
 84 |             for _ in range(num_blocks - 1):
 85 |                 layers.append(
 86 |                     block(
 87 |                         inplanes=inplanes,
 88 |                         planes=inplanes,
 89 |                         stride=1,
 90 |                         conv_cfg=conv_cfg,
 91 |                         norm_cfg=norm_cfg,
 92 |                         **kwargs))
 93 |             layers.append(
 94 |                 block(
 95 |                     inplanes=inplanes,
 96 |                     planes=planes,
 97 |                     stride=stride,
 98 |                     downsample=downsample,
 99 |                     conv_cfg=conv_cfg,
100 |                     norm_cfg=norm_cfg,
101 |                     **kwargs))
102 |         super(ResLayer, self).__init__(*layers)
103 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .collect_env import collect_env
2 | from .logger import get_root_logger
3 | 
4 | __all__ = ['get_root_logger', 'collect_env']
5 | 


--------------------------------------------------------------------------------
/mmdet/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import collect_env as collect_base_env
 2 | from mmcv.utils import get_git_hash
 3 | 
 4 | import mmdet
 5 | 
 6 | 
 7 | def collect_env():
 8 |     """Collect the information of the running environments."""
 9 |     env_info = collect_base_env()
10 |     env_info['MMDetection'] = mmdet.__version__ + '+' + get_git_hash()[:7]
11 |     return env_info
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     for name, val in collect_env().items():
16 |         print(f'{name}: {val}')
17 | 


--------------------------------------------------------------------------------
/mmdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mmcv.utils import get_logger
 4 | 
 5 | 
 6 | def get_root_logger(log_file=None, log_level=logging.INFO):
 7 |     """Get root logger.
 8 | 
 9 |     Args:
10 |         log_file (str, optional): File path of log. Defaults to None.
11 |         log_level (int, optional): The level of logger.
12 |             Defaults to logging.INFO.
13 | 
14 |     Returns:
15 |         :obj:`logging.Logger`: The obtained logger
16 |     """
17 |     logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level)
18 | 
19 |     return logger
20 | 


--------------------------------------------------------------------------------
/mmdet/utils/profiling.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | if sys.version_info >= (3, 7):
 8 | 
 9 |     @contextlib.contextmanager
10 |     def profile_time(trace_name,
11 |                      name,
12 |                      enabled=True,
13 |                      stream=None,
14 |                      end_stream=None):
15 |         """Print time spent by CPU and GPU.
16 | 
17 |         Useful as a temporary context manager to find sweet spots of code
18 |         suitable for async implementation.
19 |         """
20 |         if (not enabled) or not torch.cuda.is_available():
21 |             yield
22 |             return
23 |         stream = stream if stream else torch.cuda.current_stream()
24 |         end_stream = end_stream if end_stream else stream
25 |         start = torch.cuda.Event(enable_timing=True)
26 |         end = torch.cuda.Event(enable_timing=True)
27 |         stream.record_event(start)
28 |         try:
29 |             cpu_start = time.monotonic()
30 |             yield
31 |         finally:
32 |             cpu_end = time.monotonic()
33 |             end_stream.record_event(end)
34 |             end.synchronize()
35 |             cpu_time = (cpu_end - cpu_start) * 1000
36 |             gpu_time = start.elapsed_time(end)
37 |             msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms '
38 |             msg += f'gpu_time {gpu_time:.2f} ms stream {stream}'
39 |             print(msg, end_stream)
40 | 


--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '2.8.0'
 4 | short_version = __version__
 5 | 
 6 | 
 7 | def parse_version_info(version_str):
 8 |     version_info = []
 9 |     for x in version_str.split('.'):
10 |         if x.isdigit():
11 |             version_info.append(int(x))
12 |         elif x.find('rc') != -1:
13 |             patch_version = x.split('rc')
14 |             version_info.append(int(patch_version[0]))
15 |             version_info.append(f'rc{patch_version[1]}')
16 |     return tuple(version_info)
17 | 
18 | 
19 | version_info = parse_version_info(__version__)
20 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --xdoctest --xdoctest-style=auto
3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs
4 | 
5 | filterwarnings= default
6 |                 ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
7 |                 ignore:.*Define the __nice__ method for.*:Warning
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmdetection
2 | cython
3 | numpy
4 | 


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.3.2
2 | cityscapesscripts
3 | imagecorruptions
4 | mmlvis
5 | scipy
6 | sklearn
7 | 


--------------------------------------------------------------------------------
/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv
2 | torch
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | mmpycocotools
3 | numpy
4 | six
5 | terminaltables
6 | 


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | interrogate
 5 | isort==4.3.21
 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 7 | kwarray
 8 | pytest
 9 | ubelt
10 | xdoctest>=0.10.0
11 | yapf
12 | 


--------------------------------------------------------------------------------
/resources/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/coco_test_12510.jpg


--------------------------------------------------------------------------------
/resources/corruptions_sev_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/corruptions_sev_3.png


--------------------------------------------------------------------------------
/resources/data_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/data_pipeline.png


--------------------------------------------------------------------------------
/resources/loss_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/loss_curve.png


--------------------------------------------------------------------------------
/resources/mmdet-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcahny/object_localization_network/300d995da000484fdfd26114e4b783c992046e9c/resources/mmdet-logo.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 79
 3 | multi_line_output = 0
 4 | known_standard_library = setuptools
 5 | known_first_party = mmdet
 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch
 7 | no_lines_before = STDLIB,LOCALFOLDER
 8 | default_section = THIRDPARTY
 9 | 
10 | [yapf]
11 | BASED_ON_STYLE = pep8
12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
14 | 


--------------------------------------------------------------------------------
/tests/async_benchmark.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | import shutil
  4 | import urllib
  5 | 
  6 | import mmcv
  7 | import torch
  8 | 
  9 | from mmdet.apis import (async_inference_detector, inference_detector,
 10 |                         init_detector)
 11 | from mmdet.utils.contextmanagers import concurrent
 12 | from mmdet.utils.profiling import profile_time
 13 | 
 14 | 
 15 | async def main():
 16 |     """Benchmark between async and synchronous inference interfaces.
 17 | 
 18 |     Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x:
 19 | 
 20 |     async       sync
 21 | 
 22 |     7981.79 ms  9660.82 ms
 23 |     8074.52 ms  9660.94 ms
 24 |     7976.44 ms  9406.83 ms
 25 | 
 26 |     Async variant takes about 0.83-0.85 of the time of the synchronous
 27 |     interface.
 28 |     """
 29 |     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
 30 | 
 31 |     config_file = os.path.join(
 32 |         project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')
 33 |     checkpoint_file = os.path.join(
 34 |         project_dir,
 35 |         'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')
 36 | 
 37 |     if not os.path.exists(checkpoint_file):
 38 |         url = ('http://download.openmmlab.com/mmdetection/v2.0'
 39 |                '/mask_rcnn/mask_rcnn_r50_fpn_1x_coco'
 40 |                '/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')
 41 |         print(f'Downloading {url} ...')
 42 |         local_filename, _ = urllib.request.urlretrieve(url)
 43 |         os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True)
 44 |         shutil.move(local_filename, checkpoint_file)
 45 |         print(f'Saved as {checkpoint_file}')
 46 |     else:
 47 |         print(f'Using existing checkpoint {checkpoint_file}')
 48 | 
 49 |     device = 'cuda:0'
 50 |     model = init_detector(
 51 |         config_file, checkpoint=checkpoint_file, device=device)
 52 | 
 53 |     # queue is used for concurrent inference of multiple images
 54 |     streamqueue = asyncio.Queue()
 55 |     # queue size defines concurrency level
 56 |     streamqueue_size = 4
 57 | 
 58 |     for _ in range(streamqueue_size):
 59 |         streamqueue.put_nowait(torch.cuda.Stream(device=device))
 60 | 
 61 |     # test a single image and show the results
 62 |     img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg'))
 63 | 
 64 |     # warmup
 65 |     await async_inference_detector(model, img)
 66 | 
 67 |     async def detect(img):
 68 |         async with concurrent(streamqueue):
 69 |             return await async_inference_detector(model, img)
 70 | 
 71 |     num_of_images = 20
 72 |     with profile_time('benchmark', 'async'):
 73 |         tasks = [
 74 |             asyncio.create_task(detect(img)) for _ in range(num_of_images)
 75 |         ]
 76 |         async_results = await asyncio.gather(*tasks)
 77 | 
 78 |     with torch.cuda.stream(torch.cuda.default_stream()):
 79 |         with profile_time('benchmark', 'sync'):
 80 |             sync_results = [
 81 |                 inference_detector(model, img) for _ in range(num_of_images)
 82 |             ]
 83 | 
 84 |     result_dir = os.path.join(project_dir, 'demo')
 85 |     model.show_result(
 86 |         img,
 87 |         async_results[0],
 88 |         score_thr=0.5,
 89 |         show=False,
 90 |         out_file=os.path.join(result_dir, 'result_async.jpg'))
 91 |     model.show_result(
 92 |         img,
 93 |         sync_results[0],
 94 |         score_thr=0.5,
 95 |         show=False,
 96 |         out_file=os.path.join(result_dir, 'result_sync.jpg'))
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     asyncio.run(main())
101 | 


--------------------------------------------------------------------------------
/tests/test_async.py:
--------------------------------------------------------------------------------
 1 | """Tests for async interface."""
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | 
 7 | import asynctest
 8 | import mmcv
 9 | import torch
10 | 
11 | from mmdet.apis import async_inference_detector, init_detector
12 | 
13 | if sys.version_info >= (3, 7):
14 |     from mmdet.utils.contextmanagers import concurrent
15 | 
16 | 
17 | class AsyncTestCase(asynctest.TestCase):
18 |     use_default_loop = False
19 |     forbid_get_event_loop = True
20 | 
21 |     TEST_TIMEOUT = int(os.getenv('ASYNCIO_TEST_TIMEOUT', '30'))
22 | 
23 |     def _run_test_method(self, method):
24 |         result = method()
25 |         if asyncio.iscoroutine(result):
26 |             self.loop.run_until_complete(
27 |                 asyncio.wait_for(result, timeout=self.TEST_TIMEOUT))
28 | 
29 | 
30 | class MaskRCNNDetector:
31 | 
32 |     def __init__(self,
33 |                  model_config,
34 |                  checkpoint=None,
35 |                  streamqueue_size=3,
36 |                  device='cuda:0'):
37 | 
38 |         self.streamqueue_size = streamqueue_size
39 |         self.device = device
40 |         # build the model and load checkpoint
41 |         self.model = init_detector(
42 |             model_config, checkpoint=None, device=self.device)
43 |         self.streamqueue = None
44 | 
45 |     async def init(self):
46 |         self.streamqueue = asyncio.Queue()
47 |         for _ in range(self.streamqueue_size):
48 |             stream = torch.cuda.Stream(device=self.device)
49 |             self.streamqueue.put_nowait(stream)
50 | 
51 |     if sys.version_info >= (3, 7):
52 | 
53 |         async def apredict(self, img):
54 |             if isinstance(img, str):
55 |                 img = mmcv.imread(img)
56 |             async with concurrent(self.streamqueue):
57 |                 result = await async_inference_detector(self.model, img)
58 |             return result
59 | 
60 | 
61 | class AsyncInferenceTestCase(AsyncTestCase):
62 | 
63 |     if sys.version_info >= (3, 7):
64 | 
65 |         async def test_simple_inference(self):
66 |             if not torch.cuda.is_available():
67 |                 import pytest
68 | 
69 |                 pytest.skip('test requires GPU and torch+cuda')
70 | 
71 |             ori_grad_enabled = torch.is_grad_enabled()
72 |             root_dir = os.path.dirname(os.path.dirname(__name__))
73 |             model_config = os.path.join(
74 |                 root_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')
75 |             detector = MaskRCNNDetector(model_config)
76 |             await detector.init()
77 |             img_path = os.path.join(root_dir, 'demo/demo.jpg')
78 |             bboxes, _ = await detector.apredict(img_path)
79 |             self.assertTrue(bboxes)
80 |             # asy inference detector will hack grad_enabled,
81 |             # so restore here to avoid it to influence other tests
82 |             torch.set_grad_enabled(ori_grad_enabled)
83 | 


--------------------------------------------------------------------------------
/tests/test_coder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core.bbox.coder import YOLOBBoxCoder
 4 | 
 5 | 
 6 | def test_yolo_bbox_coder():
 7 |     coder = YOLOBBoxCoder()
 8 |     bboxes = torch.Tensor([[-42., -29., 74., 61.], [-10., -29., 106., 61.],
 9 |                            [22., -29., 138., 61.], [54., -29., 170., 61.]])
10 |     pred_bboxes = torch.Tensor([[0.4709, 0.6152, 0.1690, -0.4056],
11 |                                 [0.5399, 0.6653, 0.1162, -0.4162],
12 |                                 [0.4654, 0.6618, 0.1548, -0.4301],
13 |                                 [0.4786, 0.6197, 0.1896, -0.4479]])
14 |     grid_size = 32
15 |     expected_decode_bboxes = torch.Tensor(
16 |         [[-53.6102, -10.3096, 83.7478, 49.6824],
17 |          [-15.8700, -8.3901, 114.4236, 50.9693],
18 |          [11.1822, -8.0924, 146.6034, 50.4476],
19 |          [41.2068, -8.9232, 181.4236, 48.5840]])
20 |     assert expected_decode_bboxes.allclose(
21 |         coder.decode(bboxes, pred_bboxes, grid_size))
22 | 


--------------------------------------------------------------------------------
/tests/test_data/test_formatting.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from mmcv.utils import build_from_cfg
 4 | 
 5 | from mmdet.datasets.builder import PIPELINES
 6 | 
 7 | 
 8 | def test_default_format_bundle():
 9 |     results = dict(
10 |         img_prefix=osp.join(osp.dirname(__file__), '../data'),
11 |         img_info=dict(filename='color.jpg'))
12 |     load = dict(type='LoadImageFromFile')
13 |     load = build_from_cfg(load, PIPELINES)
14 |     bundle = dict(type='DefaultFormatBundle')
15 |     bundle = build_from_cfg(bundle, PIPELINES)
16 |     results = load(results)
17 |     assert 'pad_shape' not in results
18 |     assert 'scale_factor' not in results
19 |     assert 'img_norm_cfg' not in results
20 |     results = bundle(results)
21 |     assert 'pad_shape' in results
22 |     assert 'scale_factor' in results
23 |     assert 'img_norm_cfg' in results
24 | 


--------------------------------------------------------------------------------
/tests/test_data/test_loading.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import os.path as osp
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from mmdet.datasets.pipelines import (LoadImageFromFile, LoadImageFromWebcam,
 8 |                                       LoadMultiChannelImageFromFiles)
 9 | 
10 | 
11 | class TestLoading(object):
12 | 
13 |     @classmethod
14 |     def setup_class(cls):
15 |         cls.data_prefix = osp.join(osp.dirname(__file__), '../data')
16 | 
17 |     def test_load_img(self):
18 |         results = dict(
19 |             img_prefix=self.data_prefix, img_info=dict(filename='color.jpg'))
20 |         transform = LoadImageFromFile()
21 |         results = transform(copy.deepcopy(results))
22 |         assert results['filename'] == osp.join(self.data_prefix, 'color.jpg')
23 |         assert results['ori_filename'] == 'color.jpg'
24 |         assert results['img'].shape == (288, 512, 3)
25 |         assert results['img'].dtype == np.uint8
26 |         assert results['img_shape'] == (288, 512, 3)
27 |         assert results['ori_shape'] == (288, 512, 3)
28 |         assert repr(transform) == transform.__class__.__name__ + \
29 |             "(to_float32=False, color_type='color', " + \
30 |             "file_client_args={'backend': 'disk'})"
31 | 
32 |         # no img_prefix
33 |         results = dict(
34 |             img_prefix=None, img_info=dict(filename='tests/data/color.jpg'))
35 |         transform = LoadImageFromFile()
36 |         results = transform(copy.deepcopy(results))
37 |         assert results['filename'] == 'tests/data/color.jpg'
38 |         assert results['ori_filename'] == 'tests/data/color.jpg'
39 |         assert results['img'].shape == (288, 512, 3)
40 | 
41 |         # to_float32
42 |         transform = LoadImageFromFile(to_float32=True)
43 |         results = transform(copy.deepcopy(results))
44 |         assert results['img'].dtype == np.float32
45 | 
46 |         # gray image
47 |         results = dict(
48 |             img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg'))
49 |         transform = LoadImageFromFile()
50 |         results = transform(copy.deepcopy(results))
51 |         assert results['img'].shape == (288, 512, 3)
52 |         assert results['img'].dtype == np.uint8
53 | 
54 |         transform = LoadImageFromFile(color_type='unchanged')
55 |         results = transform(copy.deepcopy(results))
56 |         assert results['img'].shape == (288, 512)
57 |         assert results['img'].dtype == np.uint8
58 | 
59 |     def test_load_multi_channel_img(self):
60 |         results = dict(
61 |             img_prefix=self.data_prefix,
62 |             img_info=dict(filename=['color.jpg', 'color.jpg']))
63 |         transform = LoadMultiChannelImageFromFiles()
64 |         results = transform(copy.deepcopy(results))
65 |         assert results['filename'] == [
66 |             osp.join(self.data_prefix, 'color.jpg'),
67 |             osp.join(self.data_prefix, 'color.jpg')
68 |         ]
69 |         assert results['ori_filename'] == ['color.jpg', 'color.jpg']
70 |         assert results['img'].shape == (288, 512, 3, 2)
71 |         assert results['img'].dtype == np.uint8
72 |         assert results['img_shape'] == (288, 512, 3, 2)
73 |         assert results['ori_shape'] == (288, 512, 3, 2)
74 |         assert results['pad_shape'] == (288, 512, 3, 2)
75 |         assert results['scale_factor'] == 1.0
76 |         assert repr(transform) == transform.__class__.__name__ + \
77 |             "(to_float32=False, color_type='unchanged', " + \
78 |             "file_client_args={'backend': 'disk'})"
79 | 
80 |     def test_load_webcam_img(self):
81 |         img = mmcv.imread(osp.join(self.data_prefix, 'color.jpg'))
82 |         results = dict(img=img)
83 |         transform = LoadImageFromWebcam()
84 |         results = transform(copy.deepcopy(results))
85 |         assert results['filename'] is None
86 |         assert results['ori_filename'] is None
87 |         assert results['img'].shape == (288, 512, 3)
88 |         assert results['img'].dtype == np.uint8
89 |         assert results['img_shape'] == (288, 512, 3)
90 |         assert results['ori_shape'] == (288, 512, 3)
91 | 


--------------------------------------------------------------------------------
/tests/test_data/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from mmdet.datasets import replace_ImageToTensor
 4 | 
 5 | 
 6 | def test_replace_ImageToTensor():
 7 |     # with MultiScaleFlipAug
 8 |     pipelines = [
 9 |         dict(type='LoadImageFromFile'),
10 |         dict(
11 |             type='MultiScaleFlipAug',
12 |             img_scale=(1333, 800),
13 |             flip=False,
14 |             transforms=[
15 |                 dict(type='Resize', keep_ratio=True),
16 |                 dict(type='RandomFlip'),
17 |                 dict(type='Normalize'),
18 |                 dict(type='Pad', size_divisor=32),
19 |                 dict(type='ImageToTensor', keys=['img']),
20 |                 dict(type='Collect', keys=['img']),
21 |             ])
22 |     ]
23 |     expected_pipelines = [
24 |         dict(type='LoadImageFromFile'),
25 |         dict(
26 |             type='MultiScaleFlipAug',
27 |             img_scale=(1333, 800),
28 |             flip=False,
29 |             transforms=[
30 |                 dict(type='Resize', keep_ratio=True),
31 |                 dict(type='RandomFlip'),
32 |                 dict(type='Normalize'),
33 |                 dict(type='Pad', size_divisor=32),
34 |                 dict(type='DefaultFormatBundle'),
35 |                 dict(type='Collect', keys=['img']),
36 |             ])
37 |     ]
38 |     with pytest.warns(UserWarning):
39 |         assert expected_pipelines == replace_ImageToTensor(pipelines)
40 | 
41 |     # without MultiScaleFlipAug
42 |     pipelines = [
43 |         dict(type='LoadImageFromFile'),
44 |         dict(type='Resize', keep_ratio=True),
45 |         dict(type='RandomFlip'),
46 |         dict(type='Normalize'),
47 |         dict(type='Pad', size_divisor=32),
48 |         dict(type='ImageToTensor', keys=['img']),
49 |         dict(type='Collect', keys=['img']),
50 |     ]
51 |     expected_pipelines = [
52 |         dict(type='LoadImageFromFile'),
53 |         dict(type='Resize', keep_ratio=True),
54 |         dict(type='RandomFlip'),
55 |         dict(type='Normalize'),
56 |         dict(type='Pad', size_divisor=32),
57 |         dict(type='DefaultFormatBundle'),
58 |         dict(type='Collect', keys=['img']),
59 |     ]
60 |     with pytest.warns(UserWarning):
61 |         assert expected_pipelines == replace_ImageToTensor(pipelines)
62 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import torch
 4 | 
 5 | from mmdet.core.mask.structures import BitmapMasks, PolygonMasks
 6 | from mmdet.core.utils import mask2ndarray
 7 | 
 8 | 
 9 | def dummy_raw_polygon_masks(size):
10 |     """
11 |     Args:
12 |         size (tuple): expected shape of dummy masks, (N, H, W)
13 | 
14 |     Return:
15 |         list[list[ndarray]]: dummy mask
16 |     """
17 |     num_obj, heigt, width = size
18 |     polygons = []
19 |     for _ in range(num_obj):
20 |         num_points = np.random.randint(5) * 2 + 6
21 |         polygons.append([np.random.uniform(0, min(heigt, width), num_points)])
22 |     return polygons
23 | 
24 | 
25 | def test_mask2ndarray():
26 |     raw_masks = np.ones((3, 28, 28))
27 |     bitmap_mask = BitmapMasks(raw_masks, 28, 28)
28 |     output_mask = mask2ndarray(bitmap_mask)
29 |     assert np.allclose(raw_masks, output_mask)
30 | 
31 |     raw_masks = dummy_raw_polygon_masks((3, 28, 28))
32 |     polygon_masks = PolygonMasks(raw_masks, 28, 28)
33 |     output_mask = mask2ndarray(polygon_masks)
34 |     assert output_mask.shape == (3, 28, 28)
35 | 
36 |     raw_masks = np.ones((3, 28, 28))
37 |     output_mask = mask2ndarray(raw_masks)
38 |     assert np.allclose(raw_masks, output_mask)
39 | 
40 |     raw_masks = torch.ones((3, 28, 28))
41 |     output_mask = mask2ndarray(raw_masks)
42 |     assert np.allclose(raw_masks, output_mask)
43 | 
44 |     # test unsupported type
45 |     raw_masks = []
46 |     with pytest.raises(TypeError):
47 |         output_mask = mask2ndarray(raw_masks)
48 | 


--------------------------------------------------------------------------------
/tests/test_models/test_position_encoding.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | from mmdet.models.utils import (LearnedPositionalEncoding,
 5 |                                 SinePositionalEncoding)
 6 | 
 7 | 
 8 | def test_sine_positional_encoding(num_feats=16, batch_size=2):
 9 |     # test invalid type of scale
10 |     with pytest.raises(AssertionError):
11 |         module = SinePositionalEncoding(
12 |             num_feats, scale=(3., ), normalize=True)
13 | 
14 |     module = SinePositionalEncoding(num_feats)
15 |     h, w = 10, 6
16 |     mask = torch.rand(batch_size, h, w) > 0.5
17 |     assert not module.normalize
18 |     out = module(mask)
19 |     assert out.shape == (batch_size, num_feats * 2, h, w)
20 | 
21 |     # set normalize
22 |     module = SinePositionalEncoding(num_feats, normalize=True)
23 |     assert module.normalize
24 |     out = module(mask)
25 |     assert out.shape == (batch_size, num_feats * 2, h, w)
26 | 
27 | 
28 | def test_learned_positional_encoding(num_feats=16,
29 |                                      row_num_embed=10,
30 |                                      col_num_embed=10,
31 |                                      batch_size=2):
32 |     module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed)
33 |     assert module.row_embed.weight.shape == (row_num_embed, num_feats)
34 |     assert module.col_embed.weight.shape == (col_num_embed, num_feats)
35 |     h, w = 10, 6
36 |     mask = torch.rand(batch_size, h, w) > 0.5
37 |     out = module(mask)
38 |     assert out.shape == (batch_size, num_feats * 2, h, w)
39 | 


--------------------------------------------------------------------------------
/tests/test_models/test_roi_extractor.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import torch
  3 | 
  4 | from mmdet.models.roi_heads.roi_extractors import GenericRoIExtractor
  5 | 
  6 | 
  7 | def test_groie():
  8 |     # test with pre/post
  9 |     cfg = dict(
 10 |         roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
 11 |         out_channels=256,
 12 |         featmap_strides=[4, 8, 16, 32],
 13 |         pre_cfg=dict(
 14 |             type='ConvModule',
 15 |             in_channels=256,
 16 |             out_channels=256,
 17 |             kernel_size=5,
 18 |             padding=2,
 19 |             inplace=False,
 20 |         ),
 21 |         post_cfg=dict(
 22 |             type='ConvModule',
 23 |             in_channels=256,
 24 |             out_channels=256,
 25 |             kernel_size=5,
 26 |             padding=2,
 27 |             inplace=False))
 28 | 
 29 |     groie = GenericRoIExtractor(**cfg)
 30 | 
 31 |     feats = (
 32 |         torch.rand((1, 256, 200, 336)),
 33 |         torch.rand((1, 256, 100, 168)),
 34 |         torch.rand((1, 256, 50, 84)),
 35 |         torch.rand((1, 256, 25, 42)),
 36 |     )
 37 | 
 38 |     rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])
 39 | 
 40 |     res = groie(feats, rois)
 41 |     assert res.shape == torch.Size([1, 256, 7, 7])
 42 | 
 43 |     # test w.o. pre/post
 44 |     cfg = dict(
 45 |         roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
 46 |         out_channels=256,
 47 |         featmap_strides=[4, 8, 16, 32])
 48 | 
 49 |     groie = GenericRoIExtractor(**cfg)
 50 | 
 51 |     feats = (
 52 |         torch.rand((1, 256, 200, 336)),
 53 |         torch.rand((1, 256, 100, 168)),
 54 |         torch.rand((1, 256, 50, 84)),
 55 |         torch.rand((1, 256, 25, 42)),
 56 |     )
 57 | 
 58 |     rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])
 59 | 
 60 |     res = groie(feats, rois)
 61 |     assert res.shape == torch.Size([1, 256, 7, 7])
 62 | 
 63 |     # test w.o. pre/post concat
 64 |     cfg = dict(
 65 |         aggregation='concat',
 66 |         roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
 67 |         out_channels=256 * 4,
 68 |         featmap_strides=[4, 8, 16, 32])
 69 | 
 70 |     groie = GenericRoIExtractor(**cfg)
 71 | 
 72 |     feats = (
 73 |         torch.rand((1, 256, 200, 336)),
 74 |         torch.rand((1, 256, 100, 168)),
 75 |         torch.rand((1, 256, 50, 84)),
 76 |         torch.rand((1, 256, 25, 42)),
 77 |     )
 78 | 
 79 |     rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])
 80 | 
 81 |     res = groie(feats, rois)
 82 |     assert res.shape == torch.Size([1, 1024, 7, 7])
 83 | 
 84 |     # test not supported aggregate method
 85 |     with pytest.raises(AssertionError):
 86 |         cfg = dict(
 87 |             aggregation='not support',
 88 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
 89 |             out_channels=1024,
 90 |             featmap_strides=[4, 8, 16, 32])
 91 |         _ = GenericRoIExtractor(**cfg)
 92 | 
 93 |     # test concat channels number
 94 |     cfg = dict(
 95 |         aggregation='concat',
 96 |         roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
 97 |         out_channels=256 * 5,  # 256*5 != 256*4
 98 |         featmap_strides=[4, 8, 16, 32])
 99 | 
100 |     groie = GenericRoIExtractor(**cfg)
101 | 
102 |     feats = (
103 |         torch.rand((1, 256, 200, 336)),
104 |         torch.rand((1, 256, 100, 168)),
105 |         torch.rand((1, 256, 50, 84)),
106 |         torch.rand((1, 256, 25, 42)),
107 |     )
108 | 
109 |     rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])
110 | 
111 |     # out_channels does not sum of feat channels
112 |     with pytest.raises(AssertionError):
113 |         _ = groie(feats, rois)
114 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | from mmdet import digit_version
 2 | 
 3 | 
 4 | def test_version_check():
 5 |     assert digit_version('1.0.5') > digit_version('1.0.5rc0')
 6 |     assert digit_version('1.0.5') > digit_version('1.0.4rc0')
 7 |     assert digit_version('1.0.5') > digit_version('1.0rc0')
 8 |     assert digit_version('1.0.0') > digit_version('0.6.2')
 9 |     assert digit_version('1.0.0') > digit_version('0.2.16')
10 |     assert digit_version('1.0.5rc0') > digit_version('1.0.0rc0')
11 |     assert digit_version('1.0.0rc1') > digit_version('1.0.0rc0')
12 |     assert digit_version('1.0.0rc2') > digit_version('1.0.0rc0')
13 |     assert digit_version('1.0.0rc2') > digit_version('1.0.0rc1')
14 |     assert digit_version('1.0.1rc1') > digit_version('1.0.0rc1')
15 |     assert digit_version('1.0.0') > digit_version('1.0.0rc1')
16 | 


--------------------------------------------------------------------------------
/tests/test_visualization.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os
 3 | import os.path as osp
 4 | import tempfile
 5 | 
 6 | import mmcv
 7 | import numpy as np
 8 | import pytest
 9 | import torch
10 | 
11 | from mmdet.core import visualization as vis
12 | 
13 | 
14 | def test_color():
15 |     assert vis.color_val_matplotlib(mmcv.Color.blue) == (0., 0., 1.)
16 |     assert vis.color_val_matplotlib('green') == (0., 1., 0.)
17 |     assert vis.color_val_matplotlib((1, 2, 3)) == (3 / 255, 2 / 255, 1 / 255)
18 |     assert vis.color_val_matplotlib(100) == (100 / 255, 100 / 255, 100 / 255)
19 |     assert vis.color_val_matplotlib(np.zeros(3, dtype=np.int)) == (0., 0., 0.)
20 |     # forbid white color
21 |     with pytest.raises(TypeError):
22 |         vis.color_val_matplotlib([255, 255, 255])
23 |     # forbid float
24 |     with pytest.raises(TypeError):
25 |         vis.color_val_matplotlib(1.0)
26 |     # overflowed
27 |     with pytest.raises(AssertionError):
28 |         vis.color_val_matplotlib((0, 0, 500))
29 | 
30 | 
31 | def test_imshow_det_bboxes():
32 |     tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image',
33 |                             'image.jpg')
34 |     image = np.ones((10, 10, 3), np.uint8)
35 |     bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])
36 |     label = np.array([0, 1])
37 |     vis.imshow_det_bboxes(
38 |         image, bbox, label, out_file=tmp_filename, show=False)
39 |     assert osp.isfile(tmp_filename)
40 | 
41 |     # test shaped (0,)
42 |     image = np.ones((10, 10, 3), np.uint8)
43 |     bbox = np.ones((0, 4))
44 |     label = np.ones((0, ))
45 |     vis.imshow_det_bboxes(
46 |         image, bbox, label, out_file=tmp_filename, show=False)
47 | 
48 |     # test mask
49 |     image = np.ones((10, 10, 3), np.uint8)
50 |     bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])
51 |     label = np.array([0, 1])
52 |     segms = np.random.random((2, 10, 10)) > 0.5
53 |     segms = np.array(segms, np.int32)
54 |     vis.imshow_det_bboxes(
55 |         image, bbox, label, segms, out_file=tmp_filename, show=False)
56 |     assert osp.isfile(tmp_filename)
57 | 
58 |     os.remove(tmp_filename)
59 | 
60 |     # test tensor mask type error
61 |     with pytest.raises(AttributeError):
62 |         segms = torch.tensor(segms)
63 |         vis.imshow_det_bboxes(image, bbox, label, segms, show=False)
64 | 
65 | 
66 | def test_imshow_gt_det_bboxes():
67 |     tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image',
68 |                             'image.jpg')
69 |     image = np.ones((10, 10, 3), np.uint8)
70 |     bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])
71 |     label = np.array([0, 1])
72 |     annotation = dict(gt_bboxes=bbox, gt_labels=label)
73 |     det_result = np.array([[2, 1, 3, 3, 0], [3, 4, 6, 6, 1]])
74 |     result = [det_result]
75 |     vis.imshow_gt_det_bboxes(
76 |         image, annotation, result, out_file=tmp_filename, show=False)
77 |     assert osp.isfile(tmp_filename)
78 | 
79 |     # test numpy mask
80 |     gt_mask = np.ones((2, 10, 10))
81 |     annotation['gt_masks'] = gt_mask
82 |     vis.imshow_gt_det_bboxes(
83 |         image, annotation, result, out_file=tmp_filename, show=False)
84 |     assert osp.isfile(tmp_filename)
85 | 
86 |     # test tensor mask
87 |     gt_mask = torch.ones((2, 10, 10))
88 |     annotation['gt_masks'] = gt_mask
89 |     vis.imshow_gt_det_bboxes(
90 |         image, annotation, result, out_file=tmp_filename, show=False)
91 |     assert osp.isfile(tmp_filename)
92 | 
93 |     os.remove(tmp_filename)
94 | 
95 |     # test unsupported type
96 |     annotation['gt_masks'] = []
97 |     with pytest.raises(TypeError):
98 |         vis.imshow_gt_det_bboxes(image, annotation, result, show=False)
99 | 


--------------------------------------------------------------------------------
/tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import torch
  5 | from mmcv import Config
  6 | from mmcv.cnn import fuse_conv_bn
  7 | from mmcv.parallel import MMDataParallel
  8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
  9 | 
 10 | from mmdet.datasets import (build_dataloader, build_dataset,
 11 |                             replace_ImageToTensor)
 12 | from mmdet.models import build_detector
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
 17 |     parser.add_argument('config', help='test config file path')
 18 |     parser.add_argument('checkpoint', help='checkpoint file')
 19 |     parser.add_argument(
 20 |         '--log-interval', default=50, help='interval of logging')
 21 |     parser.add_argument(
 22 |         '--fuse-conv-bn',
 23 |         action='store_true',
 24 |         help='Whether to fuse conv and bn, this will slightly increase'
 25 |         'the inference speed')
 26 |     args = parser.parse_args()
 27 |     return args
 28 | 
 29 | 
 30 | def main():
 31 |     args = parse_args()
 32 | 
 33 |     cfg = Config.fromfile(args.config)
 34 |     # import modules from string list.
 35 |     if cfg.get('custom_imports', None):
 36 |         from mmcv.utils import import_modules_from_strings
 37 |         import_modules_from_strings(**cfg['custom_imports'])
 38 |     # set cudnn_benchmark
 39 |     if cfg.get('cudnn_benchmark', False):
 40 |         torch.backends.cudnn.benchmark = True
 41 |     cfg.model.pretrained = None
 42 |     cfg.data.test.test_mode = True
 43 | 
 44 |     # build the dataloader
 45 |     samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 46 |     if samples_per_gpu > 1:
 47 |         # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 48 |         cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 49 |     dataset = build_dataset(cfg.data.test)
 50 |     data_loader = build_dataloader(
 51 |         dataset,
 52 |         samples_per_gpu=1,
 53 |         workers_per_gpu=cfg.data.workers_per_gpu,
 54 |         dist=False,
 55 |         shuffle=False)
 56 | 
 57 |     # build the model and load checkpoint
 58 |     cfg.model.train_cfg = None
 59 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
 60 |     fp16_cfg = cfg.get('fp16', None)
 61 |     if fp16_cfg is not None:
 62 |         wrap_fp16_model(model)
 63 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
 64 |     if args.fuse_conv_bn:
 65 |         model = fuse_conv_bn(model)
 66 | 
 67 |     model = MMDataParallel(model, device_ids=[0])
 68 | 
 69 |     model.eval()
 70 | 
 71 |     # the first several iterations may be very slow so skip them
 72 |     num_warmup = 5
 73 |     pure_inf_time = 0
 74 | 
 75 |     # benchmark with 2000 image and take the average
 76 |     for i, data in enumerate(data_loader):
 77 | 
 78 |         torch.cuda.synchronize()
 79 |         start_time = time.perf_counter()
 80 | 
 81 |         with torch.no_grad():
 82 |             model(return_loss=False, rescale=True, **data)
 83 | 
 84 |         torch.cuda.synchronize()
 85 |         elapsed = time.perf_counter() - start_time
 86 | 
 87 |         if i >= num_warmup:
 88 |             pure_inf_time += elapsed
 89 |             if (i + 1) % args.log_interval == 0:
 90 |                 fps = (i + 1 - num_warmup) / pure_inf_time
 91 |                 print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
 92 | 
 93 |         if (i + 1) == 2000:
 94 |             pure_inf_time += elapsed
 95 |             fps = (i + 1 - num_warmup) / pure_inf_time
 96 |             print(f'Overall fps: {fps:.1f} img / s')
 97 |             break
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 


--------------------------------------------------------------------------------
/tools/browse_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | import mmcv
 6 | from mmcv import Config
 7 | 
 8 | from mmdet.core.utils import mask2ndarray
 9 | from mmdet.core.visualization import imshow_det_bboxes
10 | from mmdet.datasets.builder import build_dataset
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(description='Browse a dataset')
15 |     parser.add_argument('config', help='train config file path')
16 |     parser.add_argument(
17 |         '--skip-type',
18 |         type=str,
19 |         nargs='+',
20 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
21 |         help='skip some useless pipeline')
22 |     parser.add_argument(
23 |         '--output-dir',
24 |         default=None,
25 |         type=str,
26 |         help='If there is no display interface, you can save it')
27 |     parser.add_argument('--not-show', default=False, action='store_true')
28 |     parser.add_argument(
29 |         '--show-interval',
30 |         type=float,
31 |         default=2,
32 |         help='the interval of show (s)')
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def retrieve_data_cfg(config_path, skip_type):
38 |     cfg = Config.fromfile(config_path)
39 |     train_data_cfg = cfg.data.train
40 |     train_data_cfg['pipeline'] = [
41 |         x for x in train_data_cfg.pipeline if x['type'] not in skip_type
42 |     ]
43 | 
44 |     return cfg
45 | 
46 | 
47 | def main():
48 |     args = parse_args()
49 |     cfg = retrieve_data_cfg(args.config, args.skip_type)
50 | 
51 |     dataset = build_dataset(cfg.data.train)
52 | 
53 |     progress_bar = mmcv.ProgressBar(len(dataset))
54 | 
55 |     for item in dataset:
56 |         filename = os.path.join(args.output_dir,
57 |                                 Path(item['filename']).name
58 |                                 ) if args.output_dir is not None else None
59 | 
60 |         gt_masks = item.get('gt_masks', None)
61 |         if gt_masks is not None:
62 |             gt_masks = mask2ndarray(gt_masks)
63 | 
64 |         imshow_det_bboxes(
65 |             item['img'],
66 |             item['gt_bboxes'],
67 |             item['gt_labels'],
68 |             gt_masks,
69 |             class_names=dataset.CLASSES,
70 |             show=not args.not_show,
71 |             wait_time=args.show_interval,
72 |             out_file=filename,
73 |             bbox_color=(255, 102, 61),
74 |             text_color=(255, 102, 61))
75 | 
76 |         progress_bar.update()
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/tools/compute_auc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import metrics
 3 | 
 4 | # K (number of shots)
 5 | x = np.array([1., 10., 30., 50., 100., 300., 500., 1000.])
 6 | x_log = np.log(x) / np.log(1000)
 7 | # Average Recall scores
 8 | y = np.array([0.0, 18.0, 26.5, 29.6, 33.4, 39.0, 41.5, 45.0])
 9 | y *= 0.01
10 | auc = metrics.auc(x_log, y)
11 | print('AUC score:', auc)


--------------------------------------------------------------------------------
/tools/detectron2pytorch.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import mmcv
 5 | import torch
 6 | 
 7 | arch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}
 8 | 
 9 | 
10 | def convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names):
11 |     # detectron replace bn with affine channel layer
12 |     state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
13 |                                                               '_b'])
14 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
15 |                                                                 '_s'])
16 |     bn_size = state_dict[torch_name + '.weight'].size()
17 |     state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size)
18 |     state_dict[torch_name + '.running_var'] = torch.ones(bn_size)
19 |     converted_names.add(caffe_name + '_b')
20 |     converted_names.add(caffe_name + '_s')
21 | 
22 | 
23 | def convert_conv_fc(blobs, state_dict, caffe_name, torch_name,
24 |                     converted_names):
25 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
26 |                                                                 '_w'])
27 |     converted_names.add(caffe_name + '_w')
28 |     if caffe_name + '_b' in blobs:
29 |         state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
30 |                                                                   '_b'])
31 |         converted_names.add(caffe_name + '_b')
32 | 
33 | 
34 | def convert(src, dst, depth):
35 |     """Convert keys in detectron pretrained ResNet models to pytorch style."""
36 |     # load arch_settings
37 |     if depth not in arch_settings:
38 |         raise ValueError('Only support ResNet-50 and ResNet-101 currently')
39 |     block_nums = arch_settings[depth]
40 |     # load caffe model
41 |     caffe_model = mmcv.load(src, encoding='latin1')
42 |     blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model
43 |     # convert to pytorch style
44 |     state_dict = OrderedDict()
45 |     converted_names = set()
46 |     convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names)
47 |     convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names)
48 |     for i in range(1, len(block_nums) + 1):
49 |         for j in range(block_nums[i - 1]):
50 |             if j == 0:
51 |                 convert_conv_fc(blobs, state_dict, f'res{i + 1}_{j}_branch1',
52 |                                 f'layer{i}.{j}.downsample.0', converted_names)
53 |                 convert_bn(blobs, state_dict, f'res{i + 1}_{j}_branch1_bn',
54 |                            f'layer{i}.{j}.downsample.1', converted_names)
55 |             for k, letter in enumerate(['a', 'b', 'c']):
56 |                 convert_conv_fc(blobs, state_dict,
57 |                                 f'res{i + 1}_{j}_branch2{letter}',
58 |                                 f'layer{i}.{j}.conv{k+1}', converted_names)
59 |                 convert_bn(blobs, state_dict,
60 |                            f'res{i + 1}_{j}_branch2{letter}_bn',
61 |                            f'layer{i}.{j}.bn{k + 1}', converted_names)
62 |     # check if all layers are converted
63 |     for key in blobs:
64 |         if key not in converted_names:
65 |             print(f'Not Convert: {key}')
66 |     # save checkpoint
67 |     checkpoint = dict()
68 |     checkpoint['state_dict'] = state_dict
69 |     torch.save(checkpoint, dst)
70 | 
71 | 
72 | def main():
73 |     parser = argparse.ArgumentParser(description='Convert model keys')
74 |     parser.add_argument('src', help='src detectron model path')
75 |     parser.add_argument('dst', help='save path')
76 |     parser.add_argument('depth', type=int, help='ResNet model depth')
77 |     args = parser.parse_args()
78 |     convert(args.src, args.dst, args.depth)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/tools/dist_test_bbox.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} \
11 |     --eval bbox 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --no-validate
10 | 


--------------------------------------------------------------------------------
/tools/dist_train_and_test_bbox.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
11 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4} --no-validate
12 | 
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
16 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} \
17 |     --eval bbox 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import mmcv
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
11 |                                      'results saved in pkl format')
12 |     parser.add_argument('config', help='Config of the model')
13 |     parser.add_argument('pkl_results', help='Results in pickle format')
14 |     parser.add_argument(
15 |         '--format-only',
16 |         action='store_true',
17 |         help='Format the output results without perform evaluation. It is'
18 |         'useful when you want to format the result to a specific format and '
19 |         'submit it to the test server')
20 |     parser.add_argument(
21 |         '--eval',
22 |         type=str,
23 |         nargs='+',
24 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
25 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
26 |     parser.add_argument(
27 |         '--cfg-options',
28 |         nargs='+',
29 |         action=DictAction,
30 |         help='override some settings in the used config, the key-value pair '
31 |         'in xxx=yyy format will be merged into config file. If the value to '
32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 |         'Note that the quotation marks are necessary and that no white space '
35 |         'is allowed.')
36 |     parser.add_argument(
37 |         '--eval-options',
38 |         nargs='+',
39 |         action=DictAction,
40 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
41 |         'format will be kwargs for dataset.evaluate() function')
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | 
46 | def main():
47 |     args = parse_args()
48 | 
49 |     cfg = Config.fromfile(args.config)
50 |     assert args.eval or args.format_only, (
51 |         'Please specify at least one operation (eval/format the results) with '
52 |         'the argument "--eval", "--format-only"')
53 |     if args.eval and args.format_only:
54 |         raise ValueError('--eval and --format_only cannot be both specified')
55 | 
56 |     if args.cfg_options is not None:
57 |         cfg.merge_from_dict(args.cfg_options)
58 |     cfg.data.test.test_mode = True
59 | 
60 |     dataset = build_dataset(cfg.data.test)
61 |     outputs = mmcv.load(args.pkl_results)
62 | 
63 |     kwargs = {} if args.eval_options is None else args.eval_options
64 |     if args.format_only:
65 |         dataset.format_results(outputs, **kwargs)
66 |     if args.eval:
67 |         eval_kwargs = cfg.get('evaluation', {}).copy()
68 |         # hard-code way to remove EvalHook args
69 |         for key in [
70 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
71 |                 'rule'
72 |         ]:
73 |             eval_kwargs.pop(key, None)
74 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
75 |         print(dataset.evaluate(outputs, **eval_kwargs))
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from mmcv import Config
 5 | 
 6 | from mmdet.models import build_detector
 7 | 
 8 | try:
 9 |     from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 |     raise ImportError('Please upgrade mmcv to >0.6.2')
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument(
18 |         '--shape',
19 |         type=int,
20 |         nargs='+',
21 |         default=[1280, 800],
22 |         help='input image size')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | def main():
28 | 
29 |     args = parse_args()
30 | 
31 |     if len(args.shape) == 1:
32 |         input_shape = (3, args.shape[0], args.shape[0])
33 |     elif len(args.shape) == 2:
34 |         input_shape = (3, ) + tuple(args.shape)
35 |     else:
36 |         raise ValueError('invalid input shape')
37 | 
38 |     cfg = Config.fromfile(args.config)
39 |     # import modules from string list.
40 |     if cfg.get('custom_imports', None):
41 |         from mmcv.utils import import_modules_from_strings
42 |         import_modules_from_strings(**cfg['custom_imports'])
43 | 
44 |     model = build_detector(
45 |         cfg.model,
46 |         train_cfg=cfg.get('train_cfg'),
47 |         test_cfg=cfg.get('test_cfg'))
48 |     if torch.cuda.is_available():
49 |         model.cuda()
50 |     model.eval()
51 | 
52 |     if hasattr(model, 'forward_dummy'):
53 |         model.forward = model.forward_dummy
54 |     else:
55 |         raise NotImplementedError(
56 |             'FLOPs counter is currently not currently supported with {}'.
57 |             format(model.__class__.__name__))
58 | 
59 |     flops, params = get_model_complexity_info(model, input_shape)
60 |     split_line = '=' * 30
61 |     print(f'{split_line}\nInput shape: {input_shape}\n'
62 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
63 |     print('!!!Please be cautious if you use the results in papers. '
64 |           'You may need to check if all ops are supported and verify that the '
65 |           'flops computation is correct.')
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/tools/print_config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from mmcv import Config, DictAction
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Print the whole config')
 8 |     parser.add_argument('config', help='config file path')
 9 |     parser.add_argument(
10 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
11 |     args = parser.parse_args()
12 | 
13 |     return args
14 | 
15 | 
16 | def main():
17 |     args = parse_args()
18 | 
19 |     cfg = Config.fromfile(args.config)
20 |     if args.options is not None:
21 |         cfg.merge_from_dict(args.options)
22 |     print(f'Config:\n{cfg.pretty_text}')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     if out_file.endswith('.pth'):
26 |         out_file_name = out_file[:-4]
27 |     else:
28 |         out_file_name = out_file
29 |     final_file = out_file_name + f'-{sha[:8]}.pth'
30 |     subprocess.Popen(['mv', out_file, final_file])
31 | 
32 | 
33 | def main():
34 |     args = parse_args()
35 |     process_checkpoint(args.in_file, args.out_file)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/tools/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 8 |     new_key = model_key.replace('stem.conv', 'conv1')
 9 |     new_key = new_key.replace('stem.bn', 'bn1')
10 |     state_dict[new_key] = model_weight
11 |     converted_names.add(model_key)
12 |     print(f'Convert {model_key} to {new_key}')
13 | 
14 | 
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 |     new_key = model_key.replace('head.fc', 'fc')
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f'Convert {model_key} to {new_key}')
20 | 
21 | 
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 |     split_keys = model_key.split('.')
24 |     layer, block, module = split_keys[:3]
25 |     block_id = int(block[1:])
26 |     layer_name = f'layer{int(layer[1:])}'
27 |     block_name = f'{block_id - 1}'
28 | 
29 |     if block_id == 1 and module == 'bn':
30 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 |     elif block_id == 1 and module == 'proj':
32 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 |     elif module == 'f':
34 |         if split_keys[3] == 'a_bn':
35 |             module_name = 'bn1'
36 |         elif split_keys[3] == 'b_bn':
37 |             module_name = 'bn2'
38 |         elif split_keys[3] == 'c_bn':
39 |             module_name = 'bn3'
40 |         elif split_keys[3] == 'a':
41 |             module_name = 'conv1'
42 |         elif split_keys[3] == 'b':
43 |             module_name = 'conv2'
44 |         elif split_keys[3] == 'c':
45 |             module_name = 'conv3'
46 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 |     else:
48 |         raise ValueError(f'Unsupported conversion of key {model_key}')
49 |     print(f'Convert {model_key} to {new_key}')
50 |     state_dict[new_key] = model_weight
51 |     converted_names.add(model_key)
52 | 
53 | 
54 | def convert(src, dst):
55 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 |     # load caffe model
57 |     regnet_model = torch.load(src)
58 |     blobs = regnet_model['model_state']
59 |     # convert to pytorch style
60 |     state_dict = OrderedDict()
61 |     converted_names = set()
62 |     for key, weight in blobs.items():
63 |         if 'stem' in key:
64 |             convert_stem(key, weight, state_dict, converted_names)
65 |         elif 'head' in key:
66 |             convert_head(key, weight, state_dict, converted_names)
67 |         elif key.startswith('s'):
68 |             convert_reslayer(key, weight, state_dict, converted_names)
69 | 
70 |     # check if all layers are converted
71 |     for key in blobs:
72 |         if key not in converted_names:
73 |             print(f'not converted: {key}')
74 |     # save checkpoint
75 |     checkpoint = dict()
76 |     checkpoint['state_dict'] = state_dict
77 |     torch.save(checkpoint, dst)
78 | 
79 | 
80 | def main():
81 |     parser = argparse.ArgumentParser(description='Convert model keys')
82 |     parser.add_argument('src', help='src detectron model path')
83 |     parser.add_argument('dst', help='save path')
84 |     args = parser.parse_args()
85 |     convert(args.src, args.dst)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------