├── .gitignore ├── .style.yapf ├── .travis.yml ├── DOTA_devkit ├── .gitignore ├── DOTA.py ├── DOTA2COCO.py ├── HRSC2COCO.py ├── ImgSplit.py ├── ImgSplit_multi_process.py ├── ResultMerge.py ├── ResultMerge_multi_process.py ├── SplitOnlyImage.py ├── SplitOnlyImage_multi_process.py ├── dota_utils.py ├── nms.py ├── poly_overlaps_test.py ├── polyiou.cpp ├── polyiou.h ├── polyiou.i ├── polyiou.py ├── prepare_dota1.py ├── prepare_dota1_5.py ├── prepare_dota1_5_aug.py ├── prepare_dota1_aug.py ├── readme.md ├── results_obb2hbb.py ├── rotate.py ├── rotate_test.py ├── setup.py └── utils.py ├── GETTING_STARTED.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TECHNICAL_DETAILS.md ├── benchmarks-dota.png ├── compile.sh ├── configs ├── DOTA │ ├── cascade_mask_rcnn_r50_fpn_1x_dota.py │ ├── faster_rcnn_RoITrans_r50_fpn_1x_dota.py │ ├── faster_rcnn_RoITrans_r50_fpn_1x_dota_gap512_msTrainTest_rotationTrainTest.py │ ├── faster_rcnn_h-obb_r50_fpn_1x_dota.py │ ├── faster_rcnn_obb_dpool_r50_fpn_1x_dota.py │ ├── faster_rcnn_obb_r50_fpn_1x_dota.py │ ├── faster_rcnn_r50_fpn_1x_dota.py │ ├── htc_without_semantic_r50_fpn_1x_dota.py │ ├── mask_rcnn_r50_fpn_1x_dota.py │ ├── retinanet_obb_r50_fpn_2x_dota.py │ └── retinanet_r50_fpn_2x_dota.py ├── DOTA1_5 │ ├── cascade_mask_rcnn_r50_fpn_1x_dota1_5.py │ ├── faster_rcnn_RoITrans_r50_fpn_1x_dota1_5.py │ ├── faster_rcnn_RoITrans_r50_fpn_1x_dota1_5_gap512_msTrainTest_rotationTrainTest.py │ ├── faster_rcnn_h-obb_r50_fpn_1x_dota1_5.py │ ├── faster_rcnn_obb_dpool_r50_fpn_1x_dota1_5.py │ ├── faster_rcnn_obb_r50_fpn_1x_dota1_5.py │ ├── faster_rcnn_r50_fpn_1x_dota1_5.py │ ├── htc_without_semantic_r50_fpn_1x_dota1_5.py │ ├── mask_rcnn_r50_fpn_1x_dota1_5.py │ ├── retinanet_obb_r50_fpn_2x_dota1_5.py │ └── retinanet_r50_fpn_2x_dota1_5.py ├── cascade_mask_rcnn_r101_fpn_1x.py ├── cascade_mask_rcnn_r50_caffe_c4_1x.py ├── cascade_mask_rcnn_r50_fpn_1x.py ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py ├── cascade_rcnn_r101_fpn_1x.py ├── cascade_rcnn_r50_caffe_c4_1x.py ├── cascade_rcnn_r50_fpn_1x.py ├── cascade_rcnn_x101_32x4d_fpn_1x.py ├── cascade_rcnn_x101_64x4d_fpn_1x.py ├── dcn │ ├── README.md │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py │ ├── faster_rcnn_dpool_r50_fpn_1x.py │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_mdpool_r50_fpn_1x.py │ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py ├── fast_mask_rcnn_r101_fpn_1x.py ├── fast_mask_rcnn_r50_caffe_c4_1x.py ├── fast_mask_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py ├── fast_rcnn_r50_caffe_c4_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── faster_rcnn_ohem_r50_fpn_1x.py ├── faster_rcnn_r101_fpn_1x.py ├── faster_rcnn_r50_caffe_c4_1x.py ├── faster_rcnn_r50_fpn_1x.py ├── faster_rcnn_x101_32x4d_fpn_1x.py ├── faster_rcnn_x101_64x4d_fpn_1x.py ├── fcos │ ├── README.md │ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py │ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py │ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py ├── gcnet │ ├── README.md │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py │ └── mask_rcnn_r50_fpn_sbn_1x.py ├── ghm │ ├── README.md │ └── retinanet_ghm_r50_fpn_1x.py ├── gn+ws │ ├── README.md │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py ├── gn │ ├── README.md │ ├── mask_rcnn_r101_fpn_gn_2x.py │ ├── mask_rcnn_r50_fpn_gn_2x.py │ └── mask_rcnn_r50_fpn_gn_contrib_2x.py ├── guided_anchoring │ ├── README.md │ ├── ga_fast_r50_caffe_fpn_1x.py │ ├── ga_faster_r50_caffe_fpn_1x.py │ ├── ga_faster_x101_32x4d_fpn_1x.py │ ├── ga_retinanet_r50_caffe_fpn_1x.py │ ├── ga_retinanet_x101_32x4d_fpn_1x.py │ ├── ga_rpn_r101_caffe_rpn_1x.py │ ├── ga_rpn_r50_caffe_fpn_1x.py │ └── ga_rpn_x101_32x4d_fpn_1x.py ├── hrnet │ ├── README.md │ ├── cascade_rcnn_hrnetv2p_w32_20e.py │ ├── faster_rcnn_hrnetv2p_w18_1x.py │ ├── faster_rcnn_hrnetv2p_w32_1x.py │ ├── faster_rcnn_hrnetv2p_w40_1x.py │ ├── mask_rcnn_hrnetv2p_w18_1x.py │ └── mask_rcnn_hrnetv2p_w32_1x.py ├── htc │ ├── README.md │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py │ ├── htc_r101_fpn_20e.py │ ├── htc_r50_fpn_1x.py │ ├── htc_r50_fpn_20e.py │ ├── htc_without_semantic_r50_fpn_1x.py │ ├── htc_x101_32x4d_fpn_20e_16gpu.py │ └── htc_x101_64x4d_fpn_20e_16gpu.py ├── libra_rcnn │ ├── README.md │ ├── libra_fast_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_r101_fpn_1x.py │ ├── libra_faster_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_x101_64x4d_fpn_1x.py │ └── libra_retinanet_r50_fpn_1x.py ├── mask_rcnn_r101_fpn_1x.py ├── mask_rcnn_r50_caffe_c4_1x.py ├── mask_rcnn_r50_fpn_1x.py ├── mask_rcnn_x101_32x4d_fpn_1x.py ├── mask_rcnn_x101_64x4d_fpn_1x.py ├── pascal_voc │ ├── faster_rcnn_r50_fpn_1x_voc0712.py │ ├── ssd300_voc.py │ └── ssd512_voc.py ├── retinanet_r101_fpn_1x.py ├── retinanet_r50_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_r50_caffe_c4_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── scratch │ ├── README.md │ ├── scratch_faster_rcnn_r50_fpn_gn_6x.py │ └── scratch_mask_rcnn_r50_fpn_gn_6x.py ├── ssd300_coco.py └── ssd512_coco.py ├── data-aug.png ├── demo └── P0009.jpg ├── demo_large_image.py ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ ├── inference.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ ├── anchor_target_rbbox.py │ │ └── guided_anchor_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── base_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ ├── max_iou_assigner_hbb_cy.py │ │ │ └── max_iou_assigner_rbbox.py │ │ ├── bbox.pyx │ │ ├── bbox_target.py │ │ ├── bbox_target_rbbox.py │ │ ├── geometry.py │ │ ├── geometry_test.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── rbbox_base_sampler.py │ │ │ ├── rbbox_random_sampler.py │ │ │ └── sampling_result.py │ │ ├── setup_linux.py │ │ ├── transforms.py │ │ ├── transforms_rbbox.py │ │ └── transforms_rbbox_test.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── dota_utils.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── loss │ │ ├── __init__.py │ │ └── losses.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ ├── merge_augs.py │ │ ├── merge_augs_rotate.py │ │ └── rbbox_nms.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── DOTA.py │ ├── DOTA1_5.py │ ├── DOTA2.py │ ├── HRSC.py │ ├── __init__.py │ ├── coco.py │ ├── concat_dataset.py │ ├── custom.py │ ├── extra_aug.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── repeat_dataset.py │ ├── rotate_aug.py │ ├── transforms.py │ ├── utils.py │ ├── voc.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── anchor_head_rbbox.py │ │ ├── fcos_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── retina_head.py │ │ ├── retina_head_rbbox.py │ │ ├── rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── hrnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ └── convfc_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── RoITransformer.py │ │ ├── __init__.py │ │ ├── base.py │ │ ├── base_new.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── faster_rcnn_hbb_obb.py │ │ ├── faster_rcnn_obb.py │ │ ├── fcos.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── retinanet.py │ │ ├── retinanet_obb.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── single_stage_rbbox.py │ │ ├── test_mixins.py │ │ ├── two_stage.py │ │ └── two_stage_rbbox.py │ ├── losses │ │ ├── __init__.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ └── smooth_l1_loss.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ └── htc_mask_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ └── hrfpn.py │ ├── plugins │ │ ├── __init__.py │ │ └── non_local.py │ ├── rbbox_heads │ │ ├── __init__.py │ │ ├── convfc_rbbox_head.py │ │ └── rbbox_head.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── rroi_extractors │ │ ├── __init__.py │ │ └── rbox_single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── conv_ws.py │ │ ├── norm.py │ │ ├── scale.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── dcn │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ └── deform_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ └── deform_pool.py │ │ ├── setup.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── gcb │ │ ├── __init__.py │ │ └── context_block.py │ ├── masked_conv │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── masked_conv.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── masked_conv.py │ │ ├── setup.py │ │ └── src │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ ├── rnms_wrapper.py │ │ ├── setup.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ ├── nms_kernel.cu │ │ │ └── soft_nms_cpu.pyx │ ├── poly_nms │ │ ├── __init__.py │ │ ├── poly_nms_wrapper.py │ │ ├── setup.py │ │ └── src │ │ │ ├── poly_nms_cuda.cpp │ │ │ └── poly_nms_kernel.cu │ ├── psroi_align_rotated │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── psroi_align_rotated.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── psroi_align_rotated.py │ │ ├── setup.py │ │ └── src │ │ │ ├── psroi_align_rotated_cuda.cpp │ │ │ └── psroi_align_rotated_kernel.cu │ ├── roi_align │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── roi_align_rotated │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align_rotated.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align_rotated.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_align_rotated_cuda.cpp │ │ │ └── roi_align_rotated_kernel.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ └── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── functions │ │ ├── __init__.py │ │ └── sigmoid_focal_loss.py │ │ ├── modules │ │ ├── __init__.py │ │ └── sigmoid_focal_loss.py │ │ ├── setup.py │ │ └── src │ │ ├── sigmoid_focal_loss.cpp │ │ └── sigmoid_focal_loss_cuda.cu └── utils │ ├── __init__.py │ ├── flops_counter.py │ └── registry.py ├── requirements.txt ├── results.jpg ├── setup.py ├── sota-dota1-clsap.png ├── sota-dota1.png ├── sota-dota15-clsap.png ├── sota-dota15.png ├── sota-dota2-clsap.png ├── speed_accuracy_v8.png └── tools ├── analyze_logs.py ├── coco_eval.py ├── config_trans_map.py ├── convert_datasets └── pascal_voc.py ├── copy_models.py ├── detectron2pytorch.py ├── dist_test.sh ├── dist_train.sh ├── get_flops.py ├── parse_results.py ├── publish_model.py ├── robustness_eval.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── test_robustness.py ├── train.py ├── upgrade_model_version.py └── voc_eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/src/soft_nms_cpu.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | 113 | trash/ 114 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | 4 | install: 5 | - pip install flake8 6 | 7 | python: 8 | - "3.5" 9 | - "3.6" 10 | - "3.7" 11 | 12 | script: 13 | - flake8 14 | -------------------------------------------------------------------------------- /DOTA_devkit/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | polyiou_wrap.cxx 3 | .idea/DOTAPreprocess.iml 4 | *.so 5 | *.jpg 6 | **/.idea/ 7 | *pyc 8 | examplesplit 9 | __pycache__/ 10 | .ipynb_checkpoints/ 11 | Task1_merge/*.txt 12 | Task1/*.txt 13 | restoredexample/* 14 | -------------------------------------------------------------------------------- /DOTA_devkit/SplitOnlyImage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import copy 5 | import dota_utils as util 6 | 7 | class splitbase(): 8 | def __init__(self, 9 | srcpath, 10 | dstpath, 11 | gap=100, 12 | subsize=1024, 13 | ext='.png'): 14 | self.srcpath = srcpath 15 | self.outpath = dstpath 16 | self.gap = gap 17 | self.subsize = subsize 18 | self.slide = self.subsize - self.gap 19 | self.srcpath = srcpath 20 | self.dstpath = dstpath 21 | self.ext = ext 22 | def saveimagepatches(self, img, subimgname, left, up): 23 | subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)]) 24 | outdir = os.path.join(self.dstpath, subimgname + self.ext) 25 | cv2.imwrite(outdir, subimg) 26 | 27 | def SplitSingle(self, name, rate, extent): 28 | img = cv2.imread(os.path.join(self.srcpath, name + extent), cv2.IMREAD_UNCHANGED) 29 | assert np.shape(img) != () 30 | 31 | if (rate != 1): 32 | resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation = cv2.INTER_CUBIC) 33 | else: 34 | resizeimg = img 35 | outbasename = name + '__' + str(rate) + '__' 36 | 37 | weight = np.shape(resizeimg)[1] 38 | height = np.shape(resizeimg)[0] 39 | 40 | left, up = 0, 0 41 | while (left < weight): 42 | if (left + self.subsize >= weight): 43 | left = max(weight - self.subsize, 0) 44 | up = 0 45 | while (up < height): 46 | if (up + self.subsize >= height): 47 | up = max(height - self.subsize, 0) 48 | subimgname = outbasename + str(left) + '___' + str(up) 49 | self.saveimagepatches(resizeimg, subimgname, left, up) 50 | if (up + self.subsize >= height): 51 | break 52 | else: 53 | up = up + self.slide 54 | if (left + self.subsize >= weight): 55 | break 56 | else: 57 | left = left + self.slide 58 | 59 | def splitdata(self, rate): 60 | 61 | imagelist = util.GetFileFromThisRootDir(self.srcpath) 62 | imagenames = [util.custombasename(x) for x in imagelist if (util.custombasename(x) != 'Thumbs')] 63 | for name in imagenames: 64 | self.SplitSingle(name, rate, self.ext) 65 | if __name__ == '__main__': 66 | split = splitbase(r'/home/dingjian/data/GF3Process/tiff', 67 | r'/home/dingjian/data/GF3Process/subimg', 68 | ext='.tiff') 69 | split.splitdata(1) -------------------------------------------------------------------------------- /DOTA_devkit/poly_overlaps_test.py: -------------------------------------------------------------------------------- 1 | from poly_nms_gpu.poly_overlaps import poly_overlaps 2 | import numpy as np 3 | 4 | if __name__ == '__main__': 5 | ## TODO: improve the precision, the results seems like a little diffrerent from polyiou.cpp 6 | # , may caused by use float not double. 7 | anchors = np.array([ 8 | [1, 1, 2, 10, 0], 9 | # [1, 30, 3, 1, np.pi/16], 10 | # [1000, 1000, 60, 60, 0], 11 | 12 | ], 13 | dtype=np.float32) 14 | anchors = np.repeat(anchors, 10000, axis=0) 15 | gt_boxes = np.array([ 16 | [2, 1, 2, 10, 0], 17 | # [1, 30, 3, 1, np.pi/16 + np.pi/2], 18 | # [1010, 1010, 3, 3, 0], 19 | 20 | ], dtype=np.float32) 21 | gt_boxes = np.repeat(gt_boxes, 10000, axis=0) 22 | # anchors = np.array([[1, 1, 200, 100, 0]], 23 | # dtype=np.float32) 24 | # gt_boxes = np.array([[2, 1, 200, 100, 0], 25 | # ], dtype=np.float32) 26 | # anchors = np.array([[1, 30, 3, 1, np.pi/16]], 27 | # dtype=np.float32) 28 | # gt_boxes = np.array([[1, 30, 3, 1, np.pi/16 + np.pi/2], 29 | # ], dtype=np.float32) 30 | overlaps = poly_overlaps(anchors, gt_boxes, 0) 31 | print(overlaps) 32 | 33 | -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-2-3. 3 | // 4 | 5 | #ifndef POLYIOU_POLYIOU_H 6 | #define POLYIOU_POLYIOU_H 7 | 8 | #include 9 | double iou_poly(std::vector p, std::vector q); 10 | #endif //POLYIOU_POLYIOU_H 11 | -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.i: -------------------------------------------------------------------------------- 1 | %module polyiou 2 | %include "std_vector.i" 3 | 4 | namespace std { 5 | %template(VectorDouble) vector; 6 | }; 7 | 8 | %{ 9 | #define SWIG_FILE_WITH_INIT 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "polyiou.h" 16 | %} 17 | 18 | %include "polyiou.h" 19 | 20 | -------------------------------------------------------------------------------- /DOTA_devkit/readme.md: -------------------------------------------------------------------------------- 1 | The code is useful for DOTA or 2 | ODAI. The code provide the following function 3 |
    4 |
  • 5 | Load and image, and show the bounding box on it. 6 |
  • 7 |
  • 8 | Evaluate the result. 9 |
  • 10 |
  • 11 | Split and merge the picture and label. 12 |
  • 13 |
14 | 15 | ### What is DOTA? 16 |

17 | Dota is a large-scale dataset for object detection in aerial images. 18 | It can be used to develop and evaluate object detectors in aerial images. 19 | We will continue to update DOTA, to grow in size and scope and to reflect evolving real-world conditions. 20 | Different from general object detectin dataset. Each instance of DOTA is labeled by an arbitrary (8 d.o.f.) quadrilateral. 21 | For the detail of DOTA-v1.0, you can refer to our 22 | paper. 23 |

24 | 25 | ### What is ODAI? 26 |

27 | ODAI is a contest of object detetion in aerial images on ICPR'2018. It is based on DOTA-v1. The contest is ongoing now. 28 |

29 | 30 | ### Installation 31 | 1. install swig 32 | ``` 33 | sudo apt-get install swig 34 | ``` 35 | 2. create the c++ extension for python 36 | ``` 37 | swig -c++ -python polyiou.i 38 | python setup.py build_ext --inplace 39 | ``` 40 | 41 | ### Usage 42 | 1. For read and visualize data, you can use DOTA.py 43 | 2. For evaluation the result, you can refer to the "dota_evaluation_task1.py" and "dota_evaluation_task2.py" 44 | 3. For split the large image, you can refer to the "ImgSplit" 45 | 4. For merge the results detected on the patches, you can refer to the ResultMerge.py 46 | 47 | An example is shown in the demo. 48 | The subdirectory of "basepath"(which is used in "DOTA.py", "ImgSplit.py") is in the structure of 49 | ``` 50 | . 51 | ├── images 52 | └── labelTxt 53 | ``` 54 | 55 | -------------------------------------------------------------------------------- /DOTA_devkit/results_obb2hbb.py: -------------------------------------------------------------------------------- 1 | import DOTA_devkit.utils as util 2 | import os 3 | 4 | import argparse 5 | dota2_annopath = r'data/dota2_test-dev/labelTxt/{:s}.txt' 6 | dota2_imagesetfile = r'data/dota2_test-dev/test.txt' 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Train a detector') 11 | parser.add_argument(r'--path', default=r'/home/dj/code/mmdetection_DOTA/work_dirs/faster_rcnn_r50_fpn_1x_dota_RoITrans_v2/save1_nms2000') 12 | # parser.add_argument('--version', default='dota_v1', 13 | # help='dota version') 14 | args = parser.parse_args() 15 | 16 | return args 17 | 18 | def OBB2HBB(srcpath, dstpath): 19 | filenames = util.GetFileFromThisRootDir(srcpath) 20 | if not os.path.exists(dstpath): 21 | os.makedirs(dstpath) 22 | for file in filenames: 23 | with open(file, 'r') as f_in: 24 | with open(os.path.join(dstpath, util.mybasename(file) + '.txt'), 'w') as f_out: 25 | lines = f_in.readlines() 26 | splitlines = [x.strip().split() for x in lines] 27 | for index, splitline in enumerate(splitlines): 28 | imgname = splitline[0] 29 | score = splitline[1] 30 | poly = splitline[2:] 31 | poly = list(map(float, poly)) 32 | xmin, xmax, ymin, ymax = min(poly[0::2]), max(poly[0::2]), min(poly[1::2]), max(poly[1::2]) 33 | rec_poly = [xmin, ymin, xmax, ymax] 34 | outline = imgname + ' ' + score + ' ' + ' '.join(map(str, rec_poly)) 35 | if index != (len(splitlines) - 1): 36 | outline = outline + '\n' 37 | f_out.write(outline) 38 | 39 | if __name__ == '__main__': 40 | args = parse_args() 41 | obb_results_path = os.path.join(args.path, r'Task1_results_nms') 42 | hbb_results_path = os.path.join(args.path, r'Transed_Task2_results_nms') 43 | -------------------------------------------------------------------------------- /DOTA_devkit/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | setup.py file for SWIG example 3 | """ 4 | from distutils.core import setup, Extension 5 | import numpy 6 | 7 | polyiou_module = Extension('_polyiou', 8 | sources=['polyiou_wrap.cxx', 'polyiou.cpp'], 9 | ) 10 | setup(name = 'polyiou', 11 | version = '0.1', 12 | author = "SWIG Docs", 13 | description = """Simple swig example from docs""", 14 | ext_modules = [polyiou_module], 15 | py_modules = ["polyiou"], 16 | ) -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux 6 | - Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/)) 7 | - PyTorch 1.1 8 | - CUDA 9.0+ 9 | - NCCL 2+ 10 | - GCC 4.9+ 11 | - [mmcv](https://github.com/open-mmlab/mmcv) 12 | 13 | We have tested the following versions of OS and softwares: 14 | 15 | - OS: Ubuntu 16.04/18.04 and CentOS 7.2 16 | - CUDA: 9.0/9.2/10.0 17 | - NCCL: 2.1.15/2.2.13/2.3.7/2.4.2 18 | - GCC: 4.9/5.3/5.4/7.3 19 | 20 | ### Install Aerialdetection 21 | 22 | a. Create a conda virtual environment and activate it. Then install Cython. 23 | 24 | ```shell 25 | conda create -n AerialDetection python=3.7 -y 26 | source activate AerialDetection 27 | 28 | conda install cython 29 | ``` 30 | 31 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/). 32 | 33 | c. Clone the AerialDetection repository. 34 | 35 | ```shell 36 | git clone https://github.com/dingjiansw101/AerialDetection.git 37 | cd AerialDetection 38 | ``` 39 | 40 | d. Compile cuda extensions. 41 | 42 | ```shell 43 | ./compile.sh 44 | ``` 45 | 46 | e. Install AerialDetection (other dependencies will be installed automatically). 47 | 48 | ```shell 49 | pip install -r requirements.txt 50 | python setup.py develop 51 | # or "pip install -e ." 52 | ``` 53 | 54 | Note: 55 | 56 | 1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d. 57 | The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models. 58 | 59 | 2. Following the above instructions, AerialDetection is installed on `dev` mode, any modifications to the code will take effect without installing it again. 60 | 61 | ### Install DOTA_devkit 62 | ``` 63 | sudo apt-get install swig 64 | cd DOTA_devkit 65 | swig -c++ -python polyiou.i 66 | python setup.py build_ext --inplace 67 | ``` 68 | ### Notice 69 | You can run `python(3) setup.py develop` or `pip install -e .` to install AerialDetection if you want to make modifications to it frequently. 70 | 71 | If there are more than one AerialDetection on your machine, and you want to use them alternatively. 72 | Please insert the following code to the main file 73 | ```python 74 | import os.path as osp 75 | import sys 76 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 77 | ``` 78 | or run the following command in the terminal of corresponding folder. 79 | ```shell 80 | export PYTHONPATH=`pwd`:$PYTHONPATH 81 | ``` 82 | -------------------------------------------------------------------------------- /MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | # Benchmark and Model Zoo 2 | 3 | ## Environment 4 | 5 | ### Hardware 6 | 7 | - 4 NVIDIA Tesla V100 GPUs 8 | - Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz 9 | ### Software environment 10 | 11 | - Python 3.6 / 3.7 12 | - PyTorch 1.1.0 13 | - CUDA 10.0.176 14 | - CUDNN 7.4.1 15 | 16 | ## Common settings 17 | 18 | - All baselines were trained using 4 GPU with a batch size of 8 (2 images per GPU). 19 | - We adopt the same training schedules as Detectron. 1x indicates 12 epochs and 2x indicates 24 epochs, which corresponds to slightly less iterations than Detectron and the difference can be ignored. 20 | - We report the inference time as the overall time including data loading, network forwarding and post processing in chips with size of 1024. 21 | 22 | 23 | ## Baselines 24 | The folowing shows the baseline results. For more results, see our [paper](https://arxiv.org/abs/2102.12219). 25 | - Baseline results on DOTA (R-FPN-50, without data augmentations) 26 | ![benchmarks](benchmarks-dota.png) 27 | - Baseline results of different backbones on DOTA-v2.0 (without data augmentations). 28 | ![speed](speed_accuracy_v8.png) 29 | 30 | [comment]: <> (- Ablation study of data augmentation on DOTA-v1.5.) 31 | 32 | [comment]: <> (![dataaug](data-aug.png)) 33 | 34 | - SOTA on DOTA-v1.0. 35 | ![sota-dota1](sota-dota1.png) 36 | 37 | - SOTA on DOTA-v1.5. 38 | ![sota-dota15](sota-dota15.png) 39 | 40 | - Class-wise AP on DOTA-v1.0. 41 | ![sota-dota1-clsap](sota-dota1-clsap.png) 42 | 43 | - Class-wise AP on DOTA-v1.5. 44 | ![sota-dota15-clsap](sota-dota15-clsap.png) 45 | 46 | - Class-wise AP on DOTA-v2.0. 47 | ![sota-dota2-clsap](sota-dota2-clsap.png) 48 | -------------------------------------------------------------------------------- /benchmarks-dota.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/benchmarks-dota.png -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building roi align rotated op..." 20 | cd ../roi_align_rotated 21 | if [ -d "build" ]; then 22 | rm -r build 23 | fi 24 | $PYTHON setup.py build_ext --inplace 25 | 26 | echo "Building ps roi align rotated op..." 27 | cd ../psroi_align_rotated 28 | if [ -d "build" ]; then 29 | rm -r build 30 | fi 31 | $PYTHON setup.py build_ext --inplace 32 | 33 | echo "Building nms op..." 34 | cd ../nms 35 | if [ -d "build" ]; then 36 | rm -r build 37 | fi 38 | $PYTHON setup.py build_ext --inplace 39 | 40 | echo "Building dcn..." 41 | cd ../dcn 42 | if [ -d "build" ]; then 43 | rm -r build 44 | fi 45 | $PYTHON setup.py build_ext --inplace 46 | 47 | echo "Building sigmoid focal loss op..." 48 | cd ../sigmoid_focal_loss 49 | if [ -d "build" ]; then 50 | rm -r build 51 | fi 52 | $PYTHON setup.py build_ext --inplace 53 | 54 | echo "Building masked conv op..." 55 | cd ../masked_conv 56 | if [ -d "build" ]; then 57 | rm -r build 58 | fi 59 | $PYTHON setup.py build_ext --inplace 60 | 61 | echo "Building poly_nms op..." 62 | cd ../poly_nms 63 | if [ -d "build" ]; then 64 | rm -r build 65 | fi 66 | $PYTHON setup.py build_ext --inplace 67 | 68 | echo "Building cpu_nms..." 69 | cd ../../core/bbox 70 | $PYTHON setup_linux.py build_ext --inplace 71 | 72 | -------------------------------------------------------------------------------- /configs/fcos/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{tian2019fcos, 7 | title={FCOS: Fully Convolutional One-Stage Object Detection}, 8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 9 | journal={arXiv preprint arXiv:1904.01355}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) | 19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) | 20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) | 21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) | 22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) | 23 | 24 | 25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) | 28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) | 29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) | 30 | 31 | **Notes:** 32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. 33 | - The X-101 backbone is X-101-64x4d. 34 | -------------------------------------------------------------------------------- /configs/ghm/README.md: -------------------------------------------------------------------------------- 1 | # Gradient Harmonized Single-stage Detector 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{li2019gradient, 7 | title={Gradient Harmonized Single-stage Detector}, 8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, 9 | booktitle={AAAI Conference on Artificial Intelligence}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | To be benchmarked. -------------------------------------------------------------------------------- /configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{wu2018group, 7 | title={Group Normalization}, 8 | author={Wu, Yuxin and He, Kaiming}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | 19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | 20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | 21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | 22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | 23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | 24 | 25 | **Notes:** 26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). 27 | - The `3x` schedule is epoch [28, 34, 36]. 28 | - **Memory, Train/Inf time is outdated.** -------------------------------------------------------------------------------- /configs/hrnet/README.md: -------------------------------------------------------------------------------- 1 | # High-resolution networks (HRNets) for object detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{SunXLW19, 7 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 8 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, 9 | booktitle={CVPR}, 10 | year={2019} 11 | } 12 | 13 | @article{SunZJCXLMWLW19, 14 | title={High-Resolution Representations for Labeling Pixels and Regions}, 15 | author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 16 | and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, 17 | journal = {CoRR}, 18 | volume = {abs/1904.04514}, 19 | year={2019} 20 | } 21 | ``` 22 | 23 | ## Results and Models 24 | 25 | Faster R-CNN 26 | 27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download| 28 | | :--:|:--:|:--:|:--:|:--:|:--:| 29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)| 30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)| 31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)| 32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)| 33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)| 34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)| 35 | 36 | 37 | Mask R-CNN 38 | 39 | |Backbone|Lr sched|mask mAP|box mAP|Download| 40 | |:--:|:--:|:--:|:--:|:--:| 41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)| 42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)| 43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)| 44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)| 45 | 46 | Cascade R-CNN 47 | 48 | |Backbone|Lr sched|mAP|Download| 49 | |:--:|:--:|:--:|:--:| 50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)| 51 | 52 | **Note:** 53 | 54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). 55 | -------------------------------------------------------------------------------- /configs/libra_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). 6 | 7 | ``` 8 | @inproceedings{pang2019libra, 9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection}, 10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Results and models 17 | 18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 19 | 20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190525-c8c06833.pth) | 23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) | 24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) | 25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) | 26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) | 27 | -------------------------------------------------------------------------------- /configs/scratch/README.md: -------------------------------------------------------------------------------- 1 | # Rethinking ImageNet Pre-training 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{he2018rethinking, 7 | title={Rethinking imagenet pre-training}, 8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, 9 | journal={arXiv preprint arXiv:1811.08883}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download | 17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:| 18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x-20190515-ff554978.pth) | 19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) | 20 | 21 | Note: 22 | - The above models are trained with 16 GPUs. -------------------------------------------------------------------------------- /data-aug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/data-aug.png -------------------------------------------------------------------------------- /demo/P0009.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/demo/P0009.jpg -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import init_detector, inference_detector, show_result, draw_poly_detections 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'init_detector', 'inference_detector', 'show_result', 8 | 'draw_poly_detections' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target, anchor_inside_flags 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | from .anchor_target_rbbox import anchor_target_rbbox 5 | 6 | __all__ = [ 7 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 8 | 'ga_shape_target', 'anchor_target_rbbox' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox import bbox_overlaps_cython 2 | from .geometry import bbox_overlaps 3 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 4 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 5 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 6 | CombinedSampler, SamplingResult, rbbox_base_sampler, 7 | rbbox_random_sampler) 8 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 9 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 10 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, 11 | distance2bbox) 12 | from .bbox_target import bbox_target 13 | from .transforms_rbbox import (dbbox2delta, delta2dbbox, mask2poly, 14 | get_best_begin_point, polygonToRotRectangle_batch, 15 | dbbox2roi, dbbox_flip, dbbox_mapping, 16 | dbbox2result, Tuplelist2Polylist, roi2droi, 17 | gt_mask_bp_obbs, gt_mask_bp_obbs_list, 18 | choose_best_match_batch, 19 | choose_best_Rroi_batch, delta2dbbox_v2, 20 | delta2dbbox_v3, dbbox2delta_v3, hbb2obb_v2, RotBox2Polys, RotBox2Polys_torch, 21 | poly2bbox, dbbox_rotate_mapping, bbox_rotate_mapping, 22 | bbox_rotate_mapping, dbbox_mapping_back) 23 | from .bbox_target_rbbox import bbox_target_rbbox, rbbox_target_rbbox 24 | 25 | __all__ = [ 26 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 27 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 28 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 29 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 30 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 31 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 32 | 'distance2bbox', 'bbox_target', 'bbox_overlaps_cython', 33 | 'dbbox2delta', 'delta2dbbox', 'mask2poly', 'get_best_begin_point', 'polygonToRotRectangle_batch', 34 | 'bbox_target_rbbox', 'dbbox2roi', 'dbbox_flip', 'dbbox_mapping', 35 | 'dbbox2result', 'Tuplelist2Polylist', 'roi2droi', 'rbbox_base_sampler', 36 | 'rbbox_random_sampler', 'gt_mask_bp_obbs', 'gt_mask_bp_obbs_list', 37 | 'rbbox_target_rbbox', 'choose_best_match_batch', 'choose_best_Rroi_batch', 38 | 'delta2dbbox_v2', 'delta2dbbox_v3', 'dbbox2delta_v3', 39 | 'hbb2obb_v2', 'RotBox2Polys', 'RotBox2Polys_torch', 'poly2bbox', 'dbbox_rotate_mapping', 40 | 'bbox_rotate_mapping', 'bbox_rotate_mapping', 'dbbox_mapping_back' 41 | ] 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 4 | from .assign_result import AssignResult 5 | from .max_iou_assigner_hbb_cy import MaxIoUAssignerCy 6 | from .max_iou_assigner_rbbox import MaxIoUAssignerRbbox 7 | 8 | __all__ = [ 9 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 10 | 'MaxIoUAssignerCy', 'MaxIoUAssignerRbbox' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Sergey Karayev 7 | # Modified by Yuwen Xiong, from from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | 10 | cimport cython 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | DTYPE = np.float 15 | ctypedef np.float_t DTYPE_t 16 | 17 | def bbox_overlaps_cython( 18 | np.ndarray[DTYPE_t, ndim=2] boxes, 19 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 20 | """ 21 | Parameters 22 | ---------- 23 | boxes: (N, 4) ndarray of float 24 | query_boxes: (K, 4) ndarray of float 25 | Returns 26 | ------- 27 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 28 | """ 29 | cdef unsigned int N = boxes.shape[0] 30 | cdef unsigned int K = query_boxes.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 32 | cdef DTYPE_t iw, ih, box_area 33 | cdef DTYPE_t ua 34 | cdef unsigned int k, n 35 | for k in range(K): 36 | box_area = ( 37 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 38 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 39 | ) 40 | for n in range(N): 41 | iw = ( 42 | min(boxes[n, 2], query_boxes[k, 2]) - 43 | max(boxes[n, 0], query_boxes[k, 0]) + 1 44 | ) 45 | if iw > 0: 46 | ih = ( 47 | min(boxes[n, 3], query_boxes[k, 3]) - 48 | max(boxes[n, 1], query_boxes[k, 1]) + 1 49 | ) 50 | if ih > 0: 51 | ua = float( 52 | (boxes[n, 2] - boxes[n, 0] + 1) * 53 | (boxes[n, 3] - boxes[n, 1] + 1) + 54 | box_area - iw * ih 55 | ) 56 | overlaps[n, k] = iw * ih / ua 57 | return overlaps 58 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | # import pdb 51 | # pdb.set_trace() 52 | if num_pos > 0: 53 | labels[:num_pos] = pos_gt_labels 54 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 55 | label_weights[:num_pos] = pos_weight 56 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 57 | target_stds) 58 | bbox_targets[:num_pos, :] = pos_bbox_targets 59 | bbox_weights[:num_pos, :] = 1 60 | if num_neg > 0: 61 | label_weights[-num_neg:] = 1.0 62 | 63 | return labels, label_weights, bbox_targets, bbox_weights 64 | 65 | 66 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 67 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 68 | 4 * num_classes)) 69 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 70 | 4 * num_classes)) 71 | for i in torch.nonzero(labels > 0).squeeze(-1): 72 | start, end = labels[i] * 4, (labels[i] + 1) * 4 73 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 74 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 75 | return bbox_targets_expand, bbox_weights_expand 76 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .rbbox_base_sampler import RbboxBaseSampler 3 | from .pseudo_sampler import PseudoSampler 4 | from .random_sampler import RandomSampler 5 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 6 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 7 | from .combined_sampler import CombinedSampler 8 | from .ohem_sampler import OHEMSampler 9 | from .sampling_result import SamplingResult 10 | from .rbbox_random_sampler import RandomRbboxSampler 11 | 12 | __all__ = [ 13 | 'BaseSampler', 'RbboxBaseSampler', 'PseudoSampler', 'RandomSampler', 14 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 15 | 'OHEMSampler', 'SamplingResult', 'RandomRbboxSampler' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | # import pdb 52 | # pdb.set_trace() 53 | # TODO: verify the bboxes = bboxes[:, :4] are useless 54 | # assert bboxes.size(1) == 4 55 | bboxes = bboxes[:, :4] 56 | # if bboxes.size(1) != 4: 57 | # print('bboxes.size: ', bboxes.size(1)) 58 | 59 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 60 | if self.add_gt_as_proposals: 61 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 62 | assign_result.add_gt_(gt_labels) 63 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 64 | gt_flags = torch.cat([gt_ones, gt_flags]) 65 | 66 | num_expected_pos = int(self.num * self.pos_fraction) 67 | pos_inds = self.pos_sampler._sample_pos( 68 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 69 | # We found that sampled indices have duplicated items occasionally. 70 | # (may be a bug of PyTorch) 71 | pos_inds = pos_inds.unique() 72 | num_sampled_pos = pos_inds.numel() 73 | num_expected_neg = self.num - num_sampled_pos 74 | if self.neg_pos_ub >= 0: 75 | _pos = max(1, num_sampled_pos) 76 | neg_upper_bound = int(self.neg_pos_ub * _pos) 77 | if num_expected_neg > neg_upper_bound: 78 | num_expected_neg = neg_upper_bound 79 | neg_inds = self.neg_sampler._sample_neg( 80 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 81 | neg_inds = neg_inds.unique() 82 | 83 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 84 | assign_result, gt_flags) 85 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduce=False)['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/rbbox_base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | class RbboxBaseSampler(metaclass=ABCMeta): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | self.num = num 16 | self.pos_fraction = pos_fraction 17 | self.neg_pos_ub = neg_pos_ub 18 | self.add_gt_as_proposals = add_gt_as_proposals 19 | self.pos_sampler = self 20 | self.neg_sampler = self 21 | 22 | @abstractmethod 23 | def _sample_pos(self, assign_results, num_expected, **kwargs): 24 | pass 25 | 26 | @abstractmethod 27 | def _sample_neg(self, assign_results, num_expected, **kwargs): 28 | pass 29 | 30 | def sample(self, 31 | assign_result, 32 | rbboxes, 33 | gt_rbboxes, 34 | gt_labels=None, 35 | **kwargs): 36 | """Sample positive and negative bboxes. 37 | 38 | This is a simple implementation of bbox sampling given candidates, 39 | assigning results and ground truth bboxes. 40 | 41 | Args: 42 | assign_result (:obj:`AssignResult`): Bbox assigning results. 43 | bboxes (Tensor): Boxes to be sampled from. 44 | gt_bboxes (Tensor): Ground truth bboxes. 45 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 46 | 47 | Returns: 48 | :obj:`SamplingResult`: Sampling result. 49 | """ 50 | rbboxes = rbboxes[:, :5] 51 | 52 | gt_flags = rbboxes.new_zeros((rbboxes.shape[0], ), dtype=torch.uint8) 53 | if self.add_gt_as_proposals: 54 | # import pdb 55 | # pdb.set_trace() 56 | rbboxes = torch.cat([gt_rbboxes, rbboxes], dim=0) 57 | assign_result.add_gt_(gt_labels) 58 | gt_ones = rbboxes.new_ones(gt_rbboxes.shape[0], dtype=torch.uint8) 59 | gt_flags = torch.cat([gt_ones, gt_flags]) 60 | 61 | num_expected_pos = int(self.num * self.pos_fraction) 62 | pos_inds = self.pos_sampler._sample_pos( 63 | assign_result, num_expected_pos, bboxes=rbboxes, **kwargs) 64 | # We found that sampled indices have duplicated items occasionally. 65 | # (may be a bug of PyTorch) 66 | pos_inds = pos_inds.unique() 67 | num_sampled_pos = pos_inds.numel() 68 | num_expected_neg = self.num - num_sampled_pos 69 | if self.neg_pos_ub >= 0: 70 | _pos = max(1, num_sampled_pos) 71 | neg_upper_bound = int(self.neg_pos_ub * _pos) 72 | if num_expected_neg > neg_upper_bound: 73 | num_expected_neg = neg_upper_bound 74 | neg_inds = self.neg_sampler._sample_neg( 75 | assign_result, num_expected_neg, bboxes=rbboxes, **kwargs) 76 | neg_inds = neg_inds.unique() 77 | 78 | return SamplingResult(pos_inds, neg_inds, rbboxes, gt_rbboxes, 79 | assign_result, gt_flags) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/rbbox_random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .rbbox_base_sampler import RbboxBaseSampler 5 | 6 | 7 | class RandomRbboxSampler(RbboxBaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomRbboxSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdet/core/bbox/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The MIT License 11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 12 | # -------------------------------------------------------- 13 | 14 | import os 15 | from os.path import join as pjoin 16 | from setuptools import setup 17 | from distutils.extension import Extension 18 | from Cython.Distutils import build_ext 19 | import numpy as np 20 | 21 | # Obtain the numpy include directory. This logic works across numpy versions. 22 | try: 23 | numpy_include = np.get_include() 24 | except AttributeError: 25 | numpy_include = np.get_numpy_include() 26 | 27 | 28 | def customize_compiler_for_nvcc(self): 29 | """inject deep into distutils to customize how the dispatch 30 | to gcc/nvcc works. 31 | If you subclass UnixCCompiler, it's not trivial to get your subclass 32 | injected in, and still have the right customizations (i.e. 33 | distutils.sysconfig.customize_compiler) run on it. So instead of going 34 | the OO route, I have this. Note, it's kindof like a wierd functional 35 | subclassing going on.""" 36 | 37 | # tell the compiler it can processes .cu 38 | self.src_extensions.append('.cu') 39 | 40 | # save references to the default compiler_so and _comple methods 41 | default_compiler_so = self.compiler_so 42 | super = self._compile 43 | 44 | # now redefine the _compile method. This gets executed for each 45 | # object but distutils doesn't have the ability to change compilers 46 | # based on source extension: we add it. 47 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 48 | if os.path.splitext(src)[1] == '.cu': 49 | # use the cuda for .cu files 50 | self.set_executable('compiler_so', CUDA['nvcc']) 51 | # use only a subset of the extra_postargs, which are 1-1 translated 52 | # from the extra_compile_args in the Extension class 53 | postargs = extra_postargs['nvcc'] 54 | else: 55 | postargs = extra_postargs['gcc'] 56 | 57 | super(obj, src, ext, cc_args, postargs, pp_opts) 58 | # reset the default compiler_so, which we might have changed for cuda 59 | self.compiler_so = default_compiler_so 60 | 61 | # inject our redefined _compile method into the class 62 | self._compile = _compile 63 | 64 | 65 | # run the customize_compiler 66 | class custom_build_ext(build_ext): 67 | def build_extensions(self): 68 | customize_compiler_for_nvcc(self.compiler) 69 | build_ext.build_extensions(self) 70 | 71 | 72 | ext_modules = [ 73 | Extension( 74 | "bbox", 75 | ["bbox.pyx"], 76 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 77 | include_dirs=[numpy_include] 78 | ), 79 | ] 80 | 81 | setup( 82 | name='bbox_cython', 83 | ext_modules=ext_modules, 84 | # inject our custom trigger 85 | cmdclass={'build_ext': custom_build_ext}, 86 | ) 87 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .dota_utils import OBBDet2Comp4, HBBSeg2Comp4, HBBDet2Comp4, \ 6 | OBBDetComp4, HBBOBB2Comp4 7 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 8 | CocoDistEvalmAPHook) 9 | from .mean_ap import average_precision, eval_map, print_map_summary 10 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 11 | plot_iou_recall) 12 | 13 | __all__ = [ 14 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 15 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 16 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 17 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 18 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 19 | 'plot_num_recall', 'plot_iou_recall', 'OBBDet2Comp4', 'HBBSeg2Comp4', 20 | 'HBBDet2Comp4', 'OBBDetComp4', 'HBBOBB2Comp4' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy, 2 | weighted_binary_cross_entropy, sigmoid_focal_loss, 3 | py_sigmoid_focal_loss, weighted_sigmoid_focal_loss, 4 | mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, 5 | balanced_l1_loss, weighted_balanced_l1_loss, iou_loss, 6 | bounded_iou_loss, weighted_iou_loss, accuracy) 7 | 8 | __all__ = [ 9 | 'weighted_nll_loss', 'weighted_cross_entropy', 10 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 11 | 'py_sigmoid_focal_loss', 'weighted_sigmoid_focal_loss', 12 | 'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 13 | 'balanced_l1_loss', 'weighted_balanced_l1_loss', 'bounded_iou_loss', 14 | 'weighted_iou_loss', 'iou_loss', 'accuracy' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | # import pdb 24 | # pdb.set_trace() 25 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 26 | bbox = proposals_np[i, :].astype(np.int32) 27 | x1, y1, x2, y2 = bbox 28 | w = np.maximum(x2 - x1 + 1, 1) 29 | h = np.maximum(y2 - y1 + 1, 1) 30 | # mask is uint8 both before and after resizing 31 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 32 | (mask_size, mask_size)) 33 | mask_targets.append(target) 34 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 35 | pos_proposals.device) 36 | else: 37 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 38 | return mask_targets 39 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | from .merge_augs_rotate import (merge_rotate_aug_proposals, 5 | merge_rotate_aug_bboxes) 6 | from .rbbox_nms import multiclass_nms_rbbox, Pesudomulticlass_nms_rbbox 7 | __all__ = [ 8 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 9 | 'merge_aug_scores', 'merge_aug_masks', 'multiclass_nms_rbbox', 10 | 'Pesudomulticlass_nms_rbbox', 'merge_rotate_aug_proposals', 11 | 'merge_rotate_aug_bboxes' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class) 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.shape[1] 30 | bboxes, labels = [], [] 31 | nms_cfg_ = nms_cfg.copy() 32 | nms_type = nms_cfg_.pop('type', 'nms') 33 | nms_op = getattr(nms_wrapper, nms_type) 34 | for i in range(1, num_classes): 35 | cls_inds = multi_scores[:, i] > score_thr 36 | if not cls_inds.any(): 37 | continue 38 | # get bboxes and scores of this class 39 | if multi_bboxes.shape[1] == 4: 40 | _bboxes = multi_bboxes[cls_inds, :] 41 | else: 42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 43 | _scores = multi_scores[cls_inds, i] 44 | if score_factors is not None: 45 | _scores *= score_factors[cls_inds] 46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 48 | cls_labels = multi_bboxes.new_full( 49 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 50 | bboxes.append(cls_dets) 51 | labels.append(cls_labels) 52 | if bboxes: 53 | bboxes = torch.cat(bboxes) 54 | labels = torch.cat(labels) 55 | if bboxes.shape[0] > max_num: 56 | _, inds = bboxes[:, -1].sort(descending=True) 57 | inds = inds[:max_num] 58 | bboxes = bboxes[inds] 59 | labels = labels[inds] 60 | else: 61 | bboxes = multi_bboxes.new_zeros((0, 5)) 62 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 63 | 64 | return bboxes, labels 65 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/datasets/HRSC.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | 3 | class HRSCL1Dataset(CocoDataset): 4 | 5 | CLASSES = ('ship', ) 6 | 7 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset 7 | from .concat_dataset import ConcatDataset 8 | from .repeat_dataset import RepeatDataset 9 | from .extra_aug import ExtraAugmentation 10 | from .DOTA import DOTADataset, DOTADataset_v3 11 | from .DOTA2 import DOTA2Dataset 12 | from .DOTA2 import DOTA2Dataset_v2 13 | from .DOTA2 import DOTA2Dataset_v3, DOTA2Dataset_v4 14 | from .HRSC import HRSCL1Dataset 15 | from .DOTA1_5 import DOTA1_5Dataset, DOTA1_5Dataset_v3, DOTA1_5Dataset_v2 16 | 17 | __all__ = [ 18 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DOTADataset', 'DOTA2Dataset', 19 | 'DOTA2Dataset_v2', 'DOTA2Dataset_v3', 'VOCDataset', 'GroupSampler', 20 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 21 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 22 | 'ExtraAugmentation', 'HRSCL1Dataset', 'DOTADataset_v3', 23 | 'DOTA1_5Dataset', 'DOTA1_5Dataset_v3', 'DOTA1_5Dataset_v2', 24 | 'DOTA2Dataset_v4' 25 | ] 26 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | shuffle = kwargs.get('shuffle', True) 22 | if dist: 23 | rank, world_size = get_dist_info() 24 | if shuffle: 25 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 26 | world_size, rank) 27 | else: 28 | sampler = DistributedSampler( 29 | dataset, world_size, rank, shuffle=False) 30 | batch_size = imgs_per_gpu 31 | num_workers = workers_per_gpu 32 | else: 33 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 34 | batch_size = num_gpus * imgs_per_gpu 35 | num_workers = num_gpus * workers_per_gpu 36 | 37 | data_loader = DataLoader( 38 | dataset, 39 | batch_size=batch_size, 40 | sampler=sampler, 41 | num_workers=num_workers, 42 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 43 | pin_memory=False, 44 | **kwargs) 45 | 46 | return data_loader 47 | -------------------------------------------------------------------------------- /mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann 77 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .rroi_extractors import * # noqa: F401, F403 5 | from .anchor_heads import * # noqa: F401,F403 6 | from .shared_heads import * # noqa: F401,F403 7 | from .bbox_heads import * # noqa: F401,F403 8 | from .rbbox_heads import * # noqa: F401,F403 9 | from .mask_heads import * # noqa: F401,F403 10 | from .losses import * # noqa: F401,F403 11 | from .detectors import * # noqa: F401,F403 12 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 13 | LOSSES, DETECTORS) 14 | from .builder import (build_backbone, build_neck, build_roi_extractor, 15 | build_shared_head, build_head, build_loss, 16 | build_detector) 17 | 18 | __all__ = [ 19 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 20 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 21 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .guided_anchor_head import GuidedAnchorHead, FeatureAdaption 3 | from .fcos_head import FCOSHead 4 | from .rpn_head import RPNHead 5 | from .ga_rpn_head import GARPNHead 6 | from .retina_head import RetinaHead 7 | from .ga_retina_head import GARetinaHead 8 | from .ssd_head import SSDHead 9 | from .anchor_head_rbbox import AnchorHeadRbbox 10 | from .retina_head_rbbox import RetinaHeadRbbox 11 | 12 | __all__ = [ 13 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 14 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead', 15 | 'AnchorHeadRbbox', 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob, ConvModule 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | **kwargs): 22 | self.stacked_convs = stacked_convs 23 | self.octave_base_scale = octave_base_scale 24 | self.scales_per_octave = scales_per_octave 25 | self.conv_cfg = conv_cfg 26 | self.norm_cfg = norm_cfg 27 | octave_scales = np.array( 28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 29 | anchor_scales = octave_scales * octave_base_scale 30 | super(RetinaHead, self).__init__( 31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 32 | 33 | def _init_layers(self): 34 | self.relu = nn.ReLU(inplace=True) 35 | self.cls_convs = nn.ModuleList() 36 | self.reg_convs = nn.ModuleList() 37 | for i in range(self.stacked_convs): 38 | chn = self.in_channels if i == 0 else self.feat_channels 39 | self.cls_convs.append( 40 | ConvModule( 41 | chn, 42 | self.feat_channels, 43 | 3, 44 | stride=1, 45 | padding=1, 46 | conv_cfg=self.conv_cfg, 47 | norm_cfg=self.norm_cfg)) 48 | self.reg_convs.append( 49 | ConvModule( 50 | chn, 51 | self.feat_channels, 52 | 3, 53 | stride=1, 54 | padding=1, 55 | conv_cfg=self.conv_cfg, 56 | norm_cfg=self.norm_cfg)) 57 | self.retina_cls = nn.Conv2d( 58 | self.feat_channels, 59 | self.num_anchors * self.cls_out_channels, 60 | 3, 61 | padding=1) 62 | self.retina_reg = nn.Conv2d( 63 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 64 | 65 | def init_weights(self): 66 | for m in self.cls_convs: 67 | normal_init(m.conv, std=0.01) 68 | for m in self.reg_convs: 69 | normal_init(m.conv, std=0.01) 70 | bias_cls = bias_init_with_prob(0.01) 71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 72 | normal_init(self.retina_reg, std=0.01) 73 | 74 | def forward_single(self, x): 75 | cls_feat = x 76 | reg_feat = x 77 | for cls_conv in self.cls_convs: 78 | cls_feat = cls_conv(cls_feat) 79 | for reg_conv in self.reg_convs: 80 | reg_feat = reg_conv(reg_feat) 81 | cls_score = self.retina_cls(cls_feat) 82 | bbox_pred = self.retina_reg(reg_feat) 83 | return cls_score, bbox_pred 84 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head_rbbox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | # from .anchor_head import AnchorHead 6 | from .anchor_head_rbbox import AnchorHeadRbbox 7 | from ..registry import HEADS 8 | from ..utils import bias_init_with_prob, ConvModule 9 | 10 | 11 | @HEADS.register_module 12 | class RetinaHeadRbbox(AnchorHeadRbbox): 13 | 14 | def __init__(self, 15 | num_classes, 16 | in_channels, 17 | stacked_convs=4, 18 | octave_base_scale=4, 19 | scales_per_octave=3, 20 | conv_cfg=None, 21 | norm_cfg=None, 22 | **kwargs): 23 | self.stacked_convs = stacked_convs 24 | self.octave_base_scale = octave_base_scale 25 | self.scales_per_octave = scales_per_octave 26 | self.conv_cfg = conv_cfg 27 | self.norm_cfg = norm_cfg 28 | octave_scales = np.array( 29 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 30 | anchor_scales = octave_scales * octave_base_scale 31 | super(RetinaHeadRbbox, self).__init__( 32 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 33 | 34 | def _init_layers(self): 35 | self.relu = nn.ReLU(inplace=True) 36 | self.cls_convs = nn.ModuleList() 37 | self.reg_convs = nn.ModuleList() 38 | for i in range(self.stacked_convs): 39 | chn = self.in_channels if i == 0 else self.feat_channels 40 | self.cls_convs.append( 41 | ConvModule( 42 | chn, 43 | self.feat_channels, 44 | 3, 45 | stride=1, 46 | padding=1, 47 | conv_cfg=self.conv_cfg, 48 | norm_cfg=self.norm_cfg)) 49 | self.reg_convs.append( 50 | ConvModule( 51 | chn, 52 | self.feat_channels, 53 | 3, 54 | stride=1, 55 | padding=1, 56 | conv_cfg=self.conv_cfg, 57 | norm_cfg=self.norm_cfg)) 58 | self.retina_cls = nn.Conv2d( 59 | self.feat_channels, 60 | self.num_anchors * self.cls_out_channels, 61 | 3, 62 | padding=1) 63 | self.retina_reg = nn.Conv2d( 64 | self.feat_channels, self.num_anchors * 5, 3, padding=1) 65 | 66 | def init_weights(self): 67 | for m in self.cls_convs: 68 | normal_init(m.conv, std=0.01) 69 | for m in self.reg_convs: 70 | normal_init(m.conv, std=0.01) 71 | bias_cls = bias_init_with_prob(0.01) 72 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 73 | normal_init(self.retina_reg, std=0.01) 74 | 75 | def forward_single(self, x): 76 | cls_feat = x 77 | reg_feat = x 78 | for cls_conv in self.cls_convs: 79 | cls_feat = cls_conv(cls_feat) 80 | for reg_conv in self.reg_convs: 81 | reg_feat = reg_conv(reg_feat) 82 | cls_score = self.retina_cls(cls_feat) 83 | bbox_pred = self.retina_reg(reg_feat) 84 | return cls_score, bbox_pred 85 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, make_res_layer 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .hrnet import HRNet 5 | 6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet'] 7 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 5 | LOSSES, DETECTORS) 6 | 7 | 8 | def _build_module(cfg, registry, default_args): 9 | assert isinstance(cfg, dict) and 'type' in cfg 10 | assert isinstance(default_args, dict) or default_args is None 11 | args = cfg.copy() 12 | obj_type = args.pop('type') 13 | if mmcv.is_str(obj_type): 14 | if obj_type not in registry.module_dict: 15 | raise KeyError('{} is not in the {} registry'.format( 16 | obj_type, registry.name)) 17 | obj_type = registry.module_dict[obj_type] 18 | elif not isinstance(obj_type, type): 19 | raise TypeError('type must be a str or valid type, but got {}'.format( 20 | type(obj_type))) 21 | if default_args is not None: 22 | for name, value in default_args.items(): 23 | args.setdefault(name, value) 24 | return obj_type(**args) 25 | 26 | 27 | def build(cfg, registry, default_args=None): 28 | if isinstance(cfg, list): 29 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 30 | return nn.Sequential(*modules) 31 | else: 32 | return _build_module(cfg, registry, default_args) 33 | 34 | 35 | def build_backbone(cfg): 36 | return build(cfg, BACKBONES) 37 | 38 | 39 | def build_neck(cfg): 40 | return build(cfg, NECKS) 41 | 42 | 43 | def build_roi_extractor(cfg): 44 | return build(cfg, ROI_EXTRACTORS) 45 | 46 | 47 | def build_shared_head(cfg): 48 | return build(cfg, SHARED_HEADS) 49 | 50 | 51 | def build_head(cfg): 52 | return build(cfg, HEADS) 53 | 54 | 55 | def build_loss(cfg): 56 | return build(cfg, LOSSES) 57 | 58 | 59 | def build_detector(cfg, train_cfg=None, test_cfg=None): 60 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 61 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .htc import HybridTaskCascade 10 | from .retinanet import RetinaNet 11 | from .fcos import FCOS 12 | from .faster_rcnn_obb import FasterRCNNOBB 13 | from .two_stage_rbbox import TwoStageDetectorRbbox 14 | from .RoITransformer import RoITransformer 15 | from .faster_rcnn_hbb_obb import FasterRCNNHBBOBB 16 | from .single_stage_rbbox import SingleStageDetectorRbbox 17 | from .retinanet_obb import RetinaNetRbbox 18 | __all__ = [ 19 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 20 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 21 | 'RetinaNet', 'FCOS', 'FasterRCNNOBB', 'TwoStageDetectorRbbox', 22 | 'RoITransformer', 'FasterRCNNHBBOBB', 23 | 'SingleStageDetectorRbbox', 'RetinaNetRbbox' 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn_obb.py: -------------------------------------------------------------------------------- 1 | from .two_stage_rbbox import TwoStageDetectorRbbox 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNNOBB(TwoStageDetectorRbbox): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNNOBB, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet_obb.py: -------------------------------------------------------------------------------- 1 | from .single_stage_rbbox import SingleStageDetectorRbbox 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNetRbbox(SingleStageDetectorRbbox): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head=None, 12 | rbbox_head=None, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(RetinaNetRbbox, self).__init__(backbone, neck, bbox_head, rbbox_head, 17 | train_cfg, test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_dummy(self, img): 46 | x = self.extract_feat(img) 47 | outs = self.bbox_head(x) 48 | return outs 49 | 50 | def forward_train(self, 51 | img, 52 | img_metas, 53 | gt_bboxes, 54 | gt_labels, 55 | gt_bboxes_ignore=None): 56 | # print('in single_stage') 57 | # import pdb 58 | # pdb.set_trace() 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 62 | losses = self.bbox_head.loss( 63 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 64 | return losses 65 | 66 | def simple_test(self, img, img_meta, rescale=False): 67 | x = self.extract_feat(img) 68 | outs = self.bbox_head(x) 69 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 70 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 71 | bbox_results = [ 72 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 73 | for det_bboxes, det_labels in bbox_list 74 | ] 75 | return bbox_results[0] 76 | 77 | def aug_test(self, imgs, img_metas, rescale=False): 78 | raise NotImplementedError 79 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .cross_entropy_loss import CrossEntropyLoss 2 | from .focal_loss import FocalLoss 3 | from .smooth_l1_loss import SmoothL1Loss 4 | from .ghm_loss import GHMC, GHMR 5 | from .balanced_l1_loss import BalancedL1Loss 6 | from .iou_loss import IoULoss 7 | 8 | __all__ = [ 9 | 'CrossEntropyLoss', 'FocalLoss', 'SmoothL1Loss', 'BalancedL1Loss', 10 | 'IoULoss', 'GHMC', 'GHMR' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmdet.core import weighted_balanced_l1_loss 3 | 4 | from ..registry import LOSSES 5 | 6 | 7 | @LOSSES.register_module 8 | class BalancedL1Loss(nn.Module): 9 | """Balanced L1 Loss 10 | 11 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 12 | """ 13 | 14 | def __init__(self, alpha=0.5, gamma=1.5, beta=1.0, loss_weight=1.0): 15 | super(BalancedL1Loss, self).__init__() 16 | self.alpha = alpha 17 | self.gamma = gamma 18 | self.beta = beta 19 | self.loss_weight = loss_weight 20 | 21 | def forward(self, pred, target, weight, *args, **kwargs): 22 | loss_bbox = self.loss_weight * weighted_balanced_l1_loss( 23 | pred, 24 | target, 25 | weight, 26 | alpha=self.alpha, 27 | gamma=self.gamma, 28 | beta=self.beta, 29 | *args, 30 | **kwargs) 31 | return loss_bbox 32 | -------------------------------------------------------------------------------- /mmdet/models/losses/cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmdet.core import (weighted_cross_entropy, weighted_binary_cross_entropy, 3 | mask_cross_entropy) 4 | 5 | from ..registry import LOSSES 6 | 7 | 8 | @LOSSES.register_module 9 | class CrossEntropyLoss(nn.Module): 10 | 11 | def __init__(self, use_sigmoid=False, use_mask=False, loss_weight=1.0): 12 | super(CrossEntropyLoss, self).__init__() 13 | assert (use_sigmoid is False) or (use_mask is False) 14 | self.use_sigmoid = use_sigmoid 15 | self.use_mask = use_mask 16 | self.loss_weight = loss_weight 17 | 18 | if self.use_sigmoid: 19 | self.cls_criterion = weighted_binary_cross_entropy 20 | elif self.use_mask: 21 | self.cls_criterion = mask_cross_entropy 22 | else: 23 | self.cls_criterion = weighted_cross_entropy 24 | 25 | def forward(self, cls_score, label, label_weight, *args, **kwargs): 26 | loss_cls = self.loss_weight * self.cls_criterion( 27 | cls_score, label, label_weight, *args, **kwargs) 28 | return loss_cls 29 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmdet.core import weighted_sigmoid_focal_loss 3 | 4 | from ..registry import LOSSES 5 | 6 | 7 | @LOSSES.register_module 8 | class FocalLoss(nn.Module): 9 | 10 | def __init__(self, 11 | use_sigmoid=False, 12 | loss_weight=1.0, 13 | gamma=2.0, 14 | alpha=0.25): 15 | super(FocalLoss, self).__init__() 16 | assert use_sigmoid is True, 'Only sigmoid focaloss supported now.' 17 | self.use_sigmoid = use_sigmoid 18 | self.loss_weight = loss_weight 19 | self.gamma = gamma 20 | self.alpha = alpha 21 | self.cls_criterion = weighted_sigmoid_focal_loss 22 | 23 | def forward(self, cls_score, label, label_weight, *args, **kwargs): 24 | if self.use_sigmoid: 25 | loss_cls = self.loss_weight * self.cls_criterion( 26 | cls_score, 27 | label, 28 | label_weight, 29 | gamma=self.gamma, 30 | alpha=self.alpha, 31 | *args, 32 | **kwargs) 33 | else: 34 | raise NotImplementedError 35 | return loss_cls 36 | -------------------------------------------------------------------------------- /mmdet/models/losses/iou_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmdet.core import weighted_iou_loss 3 | 4 | from ..registry import LOSSES 5 | 6 | 7 | @LOSSES.register_module 8 | class IoULoss(nn.Module): 9 | 10 | def __init__(self, style='naive', beta=0.2, eps=1e-3, loss_weight=1.0): 11 | super(IoULoss, self).__init__() 12 | self.style = style 13 | self.beta = beta 14 | self.eps = eps 15 | self.loss_weight = loss_weight 16 | 17 | def forward(self, pred, target, weight, *args, **kwargs): 18 | loss = self.loss_weight * weighted_iou_loss( 19 | pred, 20 | target, 21 | weight, 22 | style=self.style, 23 | beta=self.beta, 24 | eps=self.eps, 25 | *args, 26 | **kwargs) 27 | return loss 28 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmdet.core import weighted_smoothl1 3 | 4 | from ..registry import LOSSES 5 | 6 | 7 | @LOSSES.register_module 8 | class SmoothL1Loss(nn.Module): 9 | 10 | def __init__(self, beta=1.0, loss_weight=1.0): 11 | super(SmoothL1Loss, self).__init__() 12 | self.beta = beta 13 | self.loss_weight = loss_weight 14 | 15 | def forward(self, pred, target, weight, *args, **kwargs): 16 | loss_bbox = self.loss_weight * weighted_smoothl1( 17 | pred, target, weight, beta=self.beta, *args, **kwargs) 18 | return loss_bbox 19 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .htc_mask_head import HTCMaskHead 3 | from .fused_semantic_head import FusedSemanticHead 4 | 5 | __all__ = ['FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead'] 6 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from ..registry import HEADS 3 | from ..utils import ConvModule 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .bfp import BFP 3 | from .hrfpn import HRFPN 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN'] 6 | -------------------------------------------------------------------------------- /mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .non_local import NonLocal2D 2 | 3 | __all__ = ['NonLocal2D'] 4 | -------------------------------------------------------------------------------- /mmdet/models/rbbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .rbbox_head import BBoxHeadRbbox 2 | from .convfc_rbbox_head import ConvFCBBoxHeadRbbox, SharedFCBBoxHeadRbbox 3 | 4 | __all__ = ['BBoxHeadRbbox', 'ConvFCBBoxHeadRbbox', 'SharedFCBBoxHeadRbbox'] 5 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | SHARED_HEADS = Registry('shared_head') 43 | HEADS = Registry('head') 44 | LOSSES = Registry('loss') 45 | DETECTORS = Registry('detector') 46 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from ..registry import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module 11 | class SingleRoIExtractor(nn.Module): 12 | """Extract RoI features from a single level feature map. 13 | 14 | If there are mulitple input feature levels, each RoI is mapped to a level 15 | according to its scale. 16 | 17 | Args: 18 | roi_layer (dict): Specify RoI layer type and arguments. 19 | out_channels (int): Output channels of RoI layers. 20 | featmap_strides (int): Strides of input feature maps. 21 | finest_scale (int): Scale threshold of mapping to level 0. 22 | """ 23 | 24 | def __init__(self, 25 | roi_layer, 26 | out_channels, 27 | featmap_strides, 28 | finest_scale=56): 29 | super(SingleRoIExtractor, self).__init__() 30 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 31 | self.out_channels = out_channels 32 | self.featmap_strides = featmap_strides 33 | self.finest_scale = finest_scale 34 | 35 | @property 36 | def num_inputs(self): 37 | """int: Input feature map levels.""" 38 | return len(self.featmap_strides) 39 | 40 | def init_weights(self): 41 | pass 42 | 43 | def build_roi_layers(self, layer_cfg, featmap_strides): 44 | cfg = layer_cfg.copy() 45 | layer_type = cfg.pop('type') 46 | assert hasattr(ops, layer_type) 47 | layer_cls = getattr(ops, layer_type) 48 | roi_layers = nn.ModuleList( 49 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 50 | return roi_layers 51 | 52 | def map_roi_levels(self, rois, num_levels): 53 | """Map rois to corresponding feature levels by scales. 54 | 55 | - scale < finest_scale: level 0 56 | - finest_scale <= scale < finest_scale * 2: level 1 57 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 58 | - scale >= finest_scale * 4: level 3 59 | 60 | Args: 61 | rois (Tensor): Input RoIs, shape (k, 5). 62 | num_levels (int): Total level number. 63 | 64 | Returns: 65 | Tensor: Level index (0-based) of each RoI, shape (k, ) 66 | """ 67 | scale = torch.sqrt( 68 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 69 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 70 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 71 | return target_lvls 72 | 73 | def forward(self, feats, rois): 74 | if len(feats) == 1: 75 | return self.roi_layers[0](feats[0], rois) 76 | 77 | out_size = self.roi_layers[0].out_size 78 | num_levels = len(feats) 79 | target_lvls = self.map_roi_levels(rois, num_levels) 80 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 81 | out_size, out_size).fill_(0) 82 | for i in range(num_levels): 83 | inds = target_lvls == i 84 | if inds.any(): 85 | rois_ = rois[inds, :] 86 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 87 | roi_feats[inds] += roi_feats_t 88 | return roi_feats 89 | -------------------------------------------------------------------------------- /mmdet/models/rroi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .rbox_single_level import RboxSingleRoIExtractor 2 | 3 | __all__ = ['RboxSingleRoIExtractor'] -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from ..backbones import ResNet, make_res_layer 8 | from ..registry import SHARED_HEADS 9 | 10 | 11 | @SHARED_HEADS.register_module 12 | class ResLayer(nn.Module): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None): 24 | super(ResLayer, self).__init__() 25 | self.norm_eval = norm_eval 26 | self.norm_cfg = norm_cfg 27 | self.stage = stage 28 | block, stage_blocks = ResNet.arch_settings[depth] 29 | stage_block = stage_blocks[stage] 30 | planes = 64 * 2**stage 31 | inplanes = 64 * 2**(stage - 1) * block.expansion 32 | 33 | res_layer = make_res_layer( 34 | block, 35 | inplanes, 36 | planes, 37 | stage_block, 38 | stride=stride, 39 | dilation=dilation, 40 | style=style, 41 | with_cp=with_cp, 42 | norm_cfg=self.norm_cfg, 43 | dcn=dcn) 44 | self.add_module('layer{}'.format(stage + 1), res_layer) 45 | 46 | def init_weights(self, pretrained=None): 47 | if isinstance(pretrained, str): 48 | logger = logging.getLogger() 49 | load_checkpoint(self, pretrained, strict=False, logger=logger) 50 | elif pretrained is None: 51 | for m in self.modules(): 52 | if isinstance(m, nn.Conv2d): 53 | kaiming_init(m) 54 | elif isinstance(m, nn.BatchNorm2d): 55 | constant_init(m, 1) 56 | else: 57 | raise TypeError('pretrained must be a str or None') 58 | 59 | def forward(self, x): 60 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 61 | out = res_layer(x) 62 | return out 63 | 64 | def train(self, mode=True): 65 | super(ResLayer, self).train(mode) 66 | if self.norm_eval: 67 | for m in self.modules(): 68 | if isinstance(m, nn.BatchNorm2d): 69 | m.eval() 70 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_ws import conv_ws_2d, ConvWS2d 2 | from .conv_module import build_conv_layer, ConvModule 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 6 | bias_init_with_prob) 7 | 8 | __all__ = [ 9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 11 | 'kaiming_init', 'bias_init_with_prob', 'Scale' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | 7 | def __init__(self, scale=1.0): 8 | super(Scale, self).__init__() 9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 10 | 11 | def forward(self, x): 12 | return x * self.scale 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack, 4 | deform_conv, modulated_deform_conv, deform_roi_pooling) 5 | from .gcb import ContextBlock 6 | from .nms import nms, soft_nms 7 | from .roi_align import RoIAlign, roi_align 8 | from .roi_pool import RoIPool, roi_pool 9 | from .roi_align_rotated import RoIAlignRotated, roi_align_rotated 10 | from .psroi_align_rotated import PSRoIAlignRotated, psroi_align_rotated 11 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 12 | from .masked_conv import MaskedConv2d 13 | 14 | __all__ = [ 15 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 16 | 'RoIAlignRotated', 'roi_align_rotated', 'PSRoIAlignRotated', 'psroi_align_rotated', 17 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 18 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 19 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 20 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 21 | 'MaskedConv2d', 'ContextBlock' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | DeformConvPack, ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/dcn/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/dcn/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension( 12 | 'deform_pool_cuda', 13 | ['src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu']), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdet/ops/gcb/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | 3 | __all__ = [ 4 | 'ContextBlock', 5 | ] 6 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.masked_conv import masked_conv2d 2 | from .modules.masked_conv import MaskedConv2d 3 | 4 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/masked_conv/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/functions/masked_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.autograd import Function 4 | from torch.nn.modules.utils import _pair 5 | from .. import masked_conv2d_cuda 6 | 7 | 8 | class MaskedConv2dFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, features, mask, weight, bias, padding=0, stride=1): 12 | assert mask.dim() == 3 and mask.size(0) == 1 13 | assert features.dim() == 4 and features.size(0) == 1 14 | assert features.size()[2:] == mask.size()[1:] 15 | pad_h, pad_w = _pair(padding) 16 | stride_h, stride_w = _pair(stride) 17 | if stride_h != 1 or stride_w != 1: 18 | raise ValueError( 19 | 'Stride could not only be 1 in masked_conv2d currently.') 20 | if not features.is_cuda: 21 | raise NotImplementedError 22 | 23 | out_channel, in_channel, kernel_h, kernel_w = weight.size() 24 | 25 | batch_size = features.size(0) 26 | out_h = int( 27 | math.floor((features.size(2) + 2 * pad_h - 28 | (kernel_h - 1) - 1) / stride_h + 1)) 29 | out_w = int( 30 | math.floor((features.size(3) + 2 * pad_w - 31 | (kernel_h - 1) - 1) / stride_w + 1)) 32 | mask_inds = torch.nonzero(mask[0] > 0) 33 | mask_h_idx = mask_inds[:, 0].contiguous() 34 | mask_w_idx = mask_inds[:, 1].contiguous() 35 | data_col = features.new_zeros(in_channel * kernel_h * kernel_w, 36 | mask_inds.size(0)) 37 | masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx, 38 | mask_w_idx, kernel_h, 39 | kernel_w, pad_h, pad_w, 40 | data_col) 41 | 42 | masked_output = torch.addmm(1, bias[:, None], 1, 43 | weight.view(out_channel, -1), data_col) 44 | output = features.new_zeros(batch_size, out_channel, out_h, out_w) 45 | masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx, 46 | mask_w_idx, out_h, out_w, 47 | out_channel, output) 48 | return output 49 | 50 | @staticmethod 51 | def backward(ctx, grad_output): 52 | return (None, ) * 5 53 | 54 | 55 | masked_conv2d = MaskedConv2dFunction.apply 56 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/masked_conv/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/modules/masked_conv.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from ..functions.masked_conv import masked_conv2d 3 | 4 | 5 | class MaskedConv2d(nn.Conv2d): 6 | """A MaskedConv2d which inherits the official Conv2d. 7 | 8 | The masked forward doesn't implement the backward function and only 9 | supports the stride parameter to be 1 currently. 10 | """ 11 | 12 | def __init__(self, 13 | in_channels, 14 | out_channels, 15 | kernel_size, 16 | stride=1, 17 | padding=0, 18 | dilation=1, 19 | groups=1, 20 | bias=True): 21 | super(MaskedConv2d, 22 | self).__init__(in_channels, out_channels, kernel_size, stride, 23 | padding, dilation, groups, bias) 24 | 25 | def forward(self, input, mask=None): 26 | if mask is None: # fallback to the normal Conv2d 27 | return super(MaskedConv2d, self).forward(input) 28 | else: 29 | return masked_conv2d(input, mask, self.weight, self.bias, 30 | self.padding) 31 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='masked_conv2d_cuda', 6 | ext_modules=[ 7 | CUDAExtension('masked_conv2d_cuda', [ 8 | 'src/masked_conv2d_cuda.cpp', 9 | 'src/masked_conv2d_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | from .rnms_wrapper import py_cpu_nms_poly_fast 3 | __all__ = ['nms', 'soft_nms', 'py_cpu_nms_poly_fast'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | def pesudo_nms(dets, iou_thr): 8 | keep = torch.range(0, len(dets)) 9 | return dets, keep 10 | 11 | def nms(dets, iou_thr, device_id=None): 12 | """Dispatch to either CPU or GPU NMS implementations. 13 | 14 | The input can be either a torch tensor or numpy array. GPU NMS will be used 15 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 16 | will be used. The returned type will always be the same as inputs. 17 | 18 | Arguments: 19 | dets (torch.Tensor or np.ndarray): bboxes with scores. 20 | iou_thr (float): IoU threshold for NMS. 21 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 22 | is None, then cpu nms is used, otherwise gpu_nms will be used. 23 | 24 | Returns: 25 | tuple: kept bboxes and indice, which is always the same data type as 26 | the input. 27 | """ 28 | # convert dets (tensor or numpy array) to tensor 29 | # import pdb 30 | # print('in nms wrapper') 31 | # pdb.set_trace() 32 | if isinstance(dets, torch.Tensor): 33 | is_numpy = False 34 | dets_th = dets 35 | elif isinstance(dets, np.ndarray): 36 | is_numpy = True 37 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 38 | dets_th = torch.from_numpy(dets).to(device) 39 | else: 40 | raise TypeError( 41 | 'dets must be either a Tensor or numpy array, but got {}'.format( 42 | type(dets))) 43 | 44 | # execute cpu or cuda nms 45 | if dets_th.shape[0] == 0: 46 | inds = dets_th.new_zeros(0, dtype=torch.long) 47 | else: 48 | if dets_th.is_cuda: 49 | inds = nms_cuda.nms(dets_th, iou_thr) 50 | else: 51 | inds = nms_cpu.nms(dets_th, iou_thr) 52 | 53 | if is_numpy: 54 | inds = inds.cpu().numpy() 55 | return dets[inds, :], inds 56 | 57 | 58 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 59 | if isinstance(dets, torch.Tensor): 60 | is_tensor = True 61 | dets_np = dets.detach().cpu().numpy() 62 | elif isinstance(dets, np.ndarray): 63 | is_tensor = False 64 | dets_np = dets 65 | else: 66 | raise TypeError( 67 | 'dets must be either a Tensor or numpy array, but got {}'.format( 68 | type(dets))) 69 | 70 | method_codes = {'linear': 1, 'gaussian': 2} 71 | if method not in method_codes: 72 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 73 | new_dets, inds = soft_nms_cpu( 74 | dets_np, 75 | iou_thr, 76 | method=method_codes[method], 77 | sigma=sigma, 78 | min_score=min_score) 79 | 80 | if is_tensor: 81 | return dets.new_tensor(new_dets), dets.new_tensor( 82 | inds, dtype=torch.long) 83 | else: 84 | return new_dets.astype(np.float32), inds.astype(np.int64) 85 | -------------------------------------------------------------------------------- /mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdet/ops/poly_nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .poly_nms_wrapper import poly_nms 2 | 3 | __all__ = ['poly_nms'] -------------------------------------------------------------------------------- /mmdet/ops/poly_nms/poly_nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .import poly_nms_cuda 4 | 5 | def poly_nms(dets, iou_thr, device_id=None): 6 | """Dispatch to either CPU or GPU NMS implementations. 7 | 8 | The input can be either a torch tensor or numpy array. GPU NMS will be used 9 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 10 | will be used. The returned type will always be the same as inputs. 11 | 12 | Arguments: 13 | dets (torch.Tensor or np.ndarray): bboxes with scores. 14 | iou_thr (float): IoU threshold for NMS. 15 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 16 | is None, then cpu nms is used, otherwise gpu_nms will be used. 17 | 18 | Returns: 19 | tuple: kept bboxes and indice, which is always the same data type as 20 | the input. 21 | """ 22 | # convert dets (tensor or numpy array) to tensor 23 | # import pdb 24 | # print('in nms wrapper') 25 | # pdb.set_trace() 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = poly_nms_cuda.poly_nms(dets_th, iou_thr) 44 | else: 45 | raise NotImplementedError 46 | 47 | if is_numpy: 48 | raise NotImplementedError 49 | return dets[inds, :], inds -------------------------------------------------------------------------------- /mmdet/ops/poly_nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | 19 | def customize_compiler_for_nvcc(self): 20 | """inject deep into distutils to customize how the dispatch 21 | to cc/nvcc works. 22 | If you subclass UnixCCompiler, it's not trivial to get your subclass 23 | injected in, and still have the right customizations (i.e. 24 | distutils.sysconfig.customize_compiler) run on it. So instead of going 25 | the OO route, I have this. Note, it's kindof like a wierd functional 26 | subclassing going on.""" 27 | 28 | # tell the compiler it can processes .cu 29 | self.src_extensions.append('.cu') 30 | 31 | # save references to the default compiler_so and _comple methods 32 | default_compiler_so = self.compiler_so 33 | super = self._compile 34 | 35 | # now redefine the _compile method. This gets executed for each 36 | # object but distutils doesn't have the ability to change compilers 37 | # based on source extension: we add it. 38 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 39 | if osp.splitext(src)[1] == '.cu': 40 | # use the cuda for .cu files 41 | self.set_executable('compiler_so', 'nvcc') 42 | # use only a subset of the extra_postargs, which are 1-1 translated 43 | # from the extra_compile_args in the Extension class 44 | postargs = extra_postargs['nvcc'] 45 | else: 46 | postargs = extra_postargs['cc'] 47 | 48 | super(obj, src, ext, cc_args, postargs, pp_opts) 49 | # reset the default compiler_so, which we might have changed for cuda 50 | self.compiler_so = default_compiler_so 51 | 52 | # inject our redefined _compile method into the class 53 | self._compile = _compile 54 | 55 | 56 | class custom_build_ext(build_ext): 57 | 58 | def build_extensions(self): 59 | customize_compiler_for_nvcc(self.compiler) 60 | build_ext.build_extensions(self) 61 | 62 | 63 | setup( 64 | name='poly_nms_cuda', 65 | ext_modules=[ 66 | CUDAExtension('poly_nms_cuda', [ 67 | 'src/poly_nms_cuda.cpp', 68 | 'src/poly_nms_kernel.cu', 69 | ]), 70 | # CUDAExtension('nms_cpu', [ 71 | # 'src/nms_cpu.cpp', 72 | # ]), 73 | ], 74 | cmdclass={'build_ext': BuildExtension}) 75 | -------------------------------------------------------------------------------- /mmdet/ops/poly_nms/src/poly_nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 4 | 5 | at::Tensor poly_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 6 | 7 | at::Tensor poly_nms(const at::Tensor& dets, const float threshold) { 8 | CHECK_CUDA(dets); 9 | if (dets.numel() == 0) 10 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 11 | return poly_nms_cuda(dets, threshold); 12 | } 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("poly_nms", &poly_nms, "polygon non-maximum suppression"); 16 | } -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.psroi_align_rotated import psroi_align_rotated 2 | from .modules.psroi_align_rotated import PSRoIAlignRotated 3 | 4 | __all__ = ['psroi_align_rotated', 'PSRoIAlignRotated'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/psroi_align_rotated/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/functions/psroi_align_rotated.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import psroi_align_rotated_cuda 4 | 5 | class PSRoIAlignRotatedFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0, output_dim=10, group_size=7): 9 | if isinstance(out_size, int): 10 | out_h = out_size 11 | out_w = out_size 12 | elif isinstance(out_size, tuple): 13 | assert len(out_size) == 2 14 | assert isinstance(out_size[0], int) 15 | assert isinstance(out_size[1], int) 16 | out_h, out_w = out_size 17 | else: 18 | raise TypeError( 19 | '"out_size" must be an integer or tuple of integers') 20 | ctx.spatial_scale = spatial_scale 21 | ctx.output_dim = output_dim 22 | ctx.group_size = group_size 23 | ctx.sample_num = sample_num 24 | ctx.save_for_backward(rois) 25 | ctx.feature_size = features.size() 26 | 27 | batch_size, num_channels, data_height, data_width = features.size() 28 | num_rois = rois.size(0) 29 | 30 | output = features.new_zeros(num_rois, output_dim, out_h, out_w) 31 | if features.is_cuda: 32 | psroi_align_rotated_cuda.forward(features, rois, out_h, out_w, spatial_scale, 33 | sample_num, output_dim, group_size, output) 34 | else: 35 | raise NotImplementedError 36 | 37 | return output 38 | 39 | @staticmethod 40 | def backward(ctx, grad_output): 41 | feature_size = ctx.feature_size 42 | spatial_scale = ctx.spatial_scale 43 | output_dim = ctx.output_dim 44 | group_size = ctx.group_size 45 | sample_num = ctx.sample_num 46 | rois = ctx.saved_tensors[0] 47 | assert (feature_size is not None and grad_output.is_cuda) 48 | 49 | batch_size, num_channels, data_height, data_width = feature_size 50 | out_w = grad_output.size(3) 51 | out_h = grad_output.size(2) 52 | 53 | grad_input = grad_rois = None 54 | if ctx.needs_input_grad[0]: 55 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 56 | data_width) 57 | psroi_align_rotated_cuda.backward(grad_output.contiguous(), rois, out_h, 58 | out_w, spatial_scale, sample_num, 59 | output_dim, group_size, 60 | grad_input) 61 | 62 | return grad_input, grad_rois, None, None, None, None, None 63 | 64 | 65 | psroi_align_rotated = PSRoIAlignRotatedFunction.apply -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/psroi_align_rotated/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/modules/psroi_align_rotated.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.psroi_align_rotated import PSRoIAlignRotatedFunction 3 | 4 | 5 | class PSRoIAlignRotated(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0, output_dim=10, group_size=7): 8 | super(PSRoIAlignRotated, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | self.output_dim = int(output_dim) 14 | self.group_size = int(group_size) 15 | 16 | def forward(self, features, rois): 17 | return PSRoIAlignRotatedFunction.apply(features, rois, self.out_size, 18 | self.spatial_scale, self.sample_num, 19 | self.output_dim, self.group_size) 20 | -------------------------------------------------------------------------------- /mmdet/ops/psroi_align_rotated/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='psroi_align_rotated_cuda', 6 | ext_modules=[ 7 | CUDAExtension('psroi_align_rotated_cuda', [ 8 | 'src/psroi_align_rotated_cuda.cpp', 9 | 'src/psroi_align_rotated_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align_rotated import roi_align_rotated 2 | from .modules.roi_align_rotated import RoIAlignRotated 3 | 4 | __all__ = ['roi_align_rotated', 'RoIAlignRotated'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_align_rotated/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/functions/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_rotated_cuda 4 | 5 | class RoIAlignRotatedFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 9 | if isinstance(out_size, int): 10 | out_h = out_size 11 | out_w = out_size 12 | elif isinstance(out_size, tuple): 13 | assert len(out_size) == 2 14 | assert isinstance(out_size[0], int) 15 | assert isinstance(out_size[1], int) 16 | out_h, out_w = out_size 17 | else: 18 | raise TypeError( 19 | '"out_size" must be an integer or tuple of integers') 20 | ctx.spatial_scale = spatial_scale 21 | ctx.sample_num = sample_num 22 | ctx.save_for_backward(rois) 23 | ctx.feature_size = features.size() 24 | 25 | batch_size, num_channels, data_height, data_width = features.size() 26 | num_rois = rois.size(0) 27 | 28 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 29 | if features.is_cuda: 30 | roi_align_rotated_cuda.forward(features, rois, out_h, out_w, spatial_scale, 31 | sample_num, output) 32 | else: 33 | raise NotImplementedError 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | feature_size = ctx.feature_size 40 | spatial_scale = ctx.spatial_scale 41 | sample_num = ctx.sample_num 42 | rois = ctx.saved_tensors[0] 43 | assert (feature_size is not None and grad_output.is_cuda) 44 | 45 | batch_size, num_channels, data_height, data_width = feature_size 46 | out_w = grad_output.size(3) 47 | out_h = grad_output.size(2) 48 | 49 | grad_input = grad_rois = None 50 | if ctx.needs_input_grad[0]: 51 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 52 | data_width) 53 | roi_align_rotated_cuda.backward(grad_output.contiguous(), rois, out_h, 54 | out_w, spatial_scale, sample_num, 55 | grad_input) 56 | 57 | return grad_input, grad_rois, None, None, None 58 | 59 | 60 | roi_align_rotated = RoIAlignRotatedFunction.apply -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_align_rotated/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/modules/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align_rotated import RoIAlignRotatedFunction 3 | 4 | 5 | class RoIAlignRotated(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlignRotated, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignRotatedFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align_rotated/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_rotated_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_rotated_cuda', [ 8 | 'src/roi_align_rotated_cuda.cpp', 9 | 'src/roi_align_rotated_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/sigmoid_focal_loss/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from .. import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25, reduction='mean'): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | reduction_enum = F._Reduction.get_enum(reduction) 21 | # none: 0, mean:1, sum: 2 22 | if reduction_enum == 0: 23 | return loss 24 | elif reduction_enum == 1: 25 | return loss.mean() 26 | elif reduction_enum == 2: 27 | return loss.sum() 28 | 29 | @staticmethod 30 | @once_differentiable 31 | def backward(ctx, d_loss): 32 | input, target = ctx.saved_tensors 33 | num_classes = ctx.num_classes 34 | gamma = ctx.gamma 35 | alpha = ctx.alpha 36 | d_loss = d_loss.contiguous() 37 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 38 | num_classes, gamma, alpha) 39 | return d_input, None, None, None, None 40 | 41 | 42 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 43 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/mmdet/ops/sigmoid_focal_loss/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss 4 | 5 | 6 | class SigmoidFocalLoss(nn.Module): 7 | 8 | def __init__(self, gamma, alpha): 9 | super(SigmoidFocalLoss, self).__init__() 10 | self.gamma = gamma 11 | self.alpha = alpha 12 | 13 | def forward(self, logits, targets): 14 | assert logits.is_cuda 15 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 16 | return loss.sum() 17 | 18 | def __repr__(self): 19 | tmpstr = self.__class__.__name__ + "(" 20 | tmpstr += "gamma=" + str(self.gamma) 21 | tmpstr += ", alpha=" + str(self.alpha) 22 | tmpstr += ")" 23 | return tmpstr 24 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='SigmoidFocalLoss', 6 | ext_modules=[ 7 | CUDAExtension('sigmoid_focal_loss_cuda', [ 8 | 'src/sigmoid_focal_loss.cpp', 9 | 'src/sigmoid_focal_loss_cuda.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | } 26 | 27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 28 | const at::Tensor &targets, 29 | const at::Tensor &d_losses, 30 | const int num_classes, const float gamma, 31 | const float alpha) { 32 | if (logits.type().is_cuda()) { 33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 34 | num_classes, gamma, alpha); 35 | } 36 | } 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 39 | m.def("forward", &SigmoidFocalLoss_forward, 40 | "SigmoidFocalLoss forward (CUDA)"); 41 | m.def("backward", &SigmoidFocalLoss_backward, 42 | "SigmoidFocalLoss backward (CUDA)"); 43 | } 44 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .flops_counter import get_model_complexity_info 2 | from .registry import Registry, build_from_cfg 3 | 4 | __all__ = ['Registry', 'build_from_cfg', 'get_model_complexity_info'] 5 | -------------------------------------------------------------------------------- /mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import mmcv 4 | 5 | 6 | class Registry(object): 7 | 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 14 | self._name, list(self._module_dict.keys())) 15 | return format_str 16 | 17 | @property 18 | def name(self): 19 | return self._name 20 | 21 | @property 22 | def module_dict(self): 23 | return self._module_dict 24 | 25 | def get(self, key): 26 | return self._module_dict.get(key, None) 27 | 28 | def _register_module(self, module_class): 29 | """Register a module. 30 | 31 | Args: 32 | module (:obj:`nn.Module`): Module to be registered. 33 | """ 34 | if not inspect.isclass(module_class): 35 | raise TypeError('module must be a class, but got {}'.format( 36 | type(module_class))) 37 | module_name = module_class.__name__ 38 | if module_name in self._module_dict: 39 | raise KeyError('{} is already registered in {}'.format( 40 | module_name, self.name)) 41 | self._module_dict[module_name] = module_class 42 | 43 | def register_module(self, cls): 44 | self._register_module(cls) 45 | return cls 46 | 47 | 48 | def build_from_cfg(cfg, registry, default_args=None): 49 | """Build a module from config dict. 50 | 51 | Args: 52 | cfg (dict): Config dict. It should at least contain the key "type". 53 | registry (:obj:`Registry`): The registry to search the type from. 54 | default_args (dict, optional): Default initialization arguments. 55 | 56 | Returns: 57 | obj: The constructed object. 58 | """ 59 | assert isinstance(cfg, dict) and 'type' in cfg 60 | assert isinstance(default_args, dict) or default_args is None 61 | args = cfg.copy() 62 | obj_type = args.pop('type') 63 | if mmcv.is_str(obj_type): 64 | obj_type = registry.get(obj_type) 65 | if obj_type is None: 66 | raise KeyError('{} is not in the {} registry'.format( 67 | obj_type, registry.name)) 68 | elif not inspect.isclass(obj_type): 69 | raise TypeError('type must be a str or valid type, but got {}'.format( 70 | type(obj_type))) 71 | if default_args is not None: 72 | for name, value in default_args.items(): 73 | args.setdefault(name, value) 74 | return obj_type(**args) 75 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | mmcv 3 | shapely 4 | tqdm -------------------------------------------------------------------------------- /results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/results.jpg -------------------------------------------------------------------------------- /sota-dota1-clsap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/sota-dota1-clsap.png -------------------------------------------------------------------------------- /sota-dota1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/sota-dota1.png -------------------------------------------------------------------------------- /sota-dota15-clsap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/sota-dota15-clsap.png -------------------------------------------------------------------------------- /sota-dota15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/sota-dota15.png -------------------------------------------------------------------------------- /sota-dota2-clsap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/sota-dota2-clsap.png -------------------------------------------------------------------------------- /speed_accuracy_v8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingjiansw101/AerialDetection/fbb7726bc0c6898fc00e50a418a3f5e0838b30d4/speed_accuracy_v8.png -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /tools/copy_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | configs_dota = [ 5 | 'retinanet_r50_fpn_2x_dota' , 6 | 'retinanet_v5_obb_r50_fpn_2x_dota', 7 | 'mask_rcnn_r50_fpn_1x_dota', 8 | 'htc_without_semantic_r50_fpn_1x_dota', 9 | 'faster_rcnn_r50_fpn_1x_dota', 10 | 'faster_rcnn_r50_fpn_1x_dota_obb_v3', 11 | 'faster_rcnn_dpool_v3_r50_fpn_1x_dota_obb', 12 | 'faster_rcnn_obb_hbb_v3_r50_fpn_1x_dota', 13 | 'faster_rcnn_r50_fpn_1x_dota_RoITrans_v5', 14 | 'cascade_mask_rcnn_r50_fpn_1x_dota' 15 | ] 16 | 17 | configs_dota1_5 = ['retinanet_r50_fpn_2x_dota1_5_v2', 18 | 'retinanet_v5_obb_r50_fpn_2x_dota1_5_v2', 19 | 'mask_rcnn_r50_fpn_1x_dota1_5_v2', 20 | 'cascade_mask_rcnn_r50_fpn_1x_dota1_5_v2', 21 | 'htc_without_semantic_r50_fpn_1x_dota1_5_v2', 22 | 'faster_rcnn_r50_fpn_1x_dota1_5_v2', 23 | 'faster_rcnn_r50_fpn_1x_dota1_5_v2_obb_v3', 24 | 'faster_rcnn_dpool_v3_r50_fpn_1x_dota1_5_v2_obb', 25 | 'faster_rcnn_obb_hbb_v3_r50_fpn_1x_dota1_5_v2', 26 | 'faster_rcnn_r50_fpn_1x_dota1_5_v2_RoITrans_v5'] 27 | 28 | configs_dota2 = [ 29 | 'retinanet_r50_fpn_2x_dota2_v3', 30 | 'retinanet_v5_obb_r50_fpn_2x_dota2_v3', 31 | 'mask_rcnn_r50_fpn_1x_dota2_v3', 32 | 'cascade_mask_rcnn_r50_fpn_1x_dota2_v3', 33 | 'htc_without_semantic_r50_fpn_1x_dota2_v3', 34 | 'faster_rcnn_r50_fpn_1x_dota2_v3', 35 | 'faster_rcnn_r50_fpn_1x_dota2_v3_obb_v3', 36 | 'faster_rcnn_dpool_v3_r50_fpn_1x_dota2_v3_obb', 37 | 'faster_rcnn_obb_hbb_v3_r50_fpn_1x_dota2_v3', 38 | 'faster_rcnn_r50_fpn_1x_dota2_v3_RoITrans_v5' 39 | ] 40 | 41 | 42 | def param_copy(srcworkdir, dstworkdir): 43 | # filenames = configs_dota + configs_dota1_5 44 | filenames = configs_dota2 45 | for file in filenames: 46 | dst_config_path = os.path.join(dstworkdir, file) 47 | if not os.path.exists(dst_config_path): 48 | os.makedirs(dst_config_path) 49 | src_config_path = os.path.join(srcworkdir, file) 50 | if '1x' in file: 51 | shutil.copy(os.path.join(src_config_path, 'epoch_12.pth'), 52 | os.path.join(dst_config_path, 'epoch_12.pth')) 53 | elif '2x' in file: 54 | shutil.copy(os.path.join(src_config_path, 'epoch_24.pth'), 55 | os.path.join(dst_config_path, 'epoch_24.pth')) 56 | else: 57 | print('warnining', file) 58 | 59 | if __name__ == '__main__': 60 | param_copy(r'/home/dingjian/project/code/mmdetection_DOTA/work_dirs', 61 | r'/home/dingjian/project/code/Aerialdetection/work_dirs') -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | CHECKPOINT=$2 7 | GPUS=$3 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from mmcv import Config 4 | 5 | from mmdet.models import build_detector 6 | from mmdet.utils import get_model_complexity_info 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Train a detector') 11 | parser.add_argument('config', help='train config file path') 12 | parser.add_argument( 13 | '--shape', 14 | type=int, 15 | nargs='+', 16 | default=[1024, 1024], 17 | help='input image size') 18 | args = parser.parse_args() 19 | return args 20 | 21 | 22 | def main(): 23 | 24 | args = parse_args() 25 | 26 | if len(args.shape) == 1: 27 | input_shape = (3, args.shape[0], args.shape[0]) 28 | elif len(args.shape) == 2: 29 | input_shape = (3, ) + tuple(args.shape) 30 | else: 31 | raise ValueError('invalid input shape') 32 | 33 | cfg = Config.fromfile(args.config) 34 | model = build_detector( 35 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda() 36 | model.eval() 37 | 38 | if hasattr(model, 'forward_dummy'): 39 | model.forward = model.forward_dummy 40 | else: 41 | raise NotImplementedError( 42 | 'FLOPs counter is currently not currently supported with {}'. 43 | format(model.__class__.__name__)) 44 | 45 | flops, params = get_model_complexity_info(model, input_shape) 46 | split_line = '=' * 30 47 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 48 | split_line, input_shape, flops, params)) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser( 8 | description='Process a checkpoint to be published') 9 | parser.add_argument('in_file', help='input checkpoint filename') 10 | parser.add_argument('out_file', help='output checkpoint filename') 11 | args = parser.parse_args() 12 | return args 13 | 14 | 15 | def process_checkpoint(in_file, out_file): 16 | checkpoint = torch.load(in_file, map_location='cpu') 17 | # remove optimizer for smaller file size 18 | if 'optimizer' in checkpoint: 19 | del checkpoint['optimizer'] 20 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 21 | # add the code here. 22 | torch.save(checkpoint, out_file) 23 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 24 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 25 | subprocess.Popen(['mv', out_file, final_file]) 26 | 27 | 28 | def main(): 29 | args = parse_args() 30 | process_checkpoint(args.in_file, args.out_file) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmdet import __version__ 7 | from mmdet.datasets import get_dataset 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmdet.models import build_detector 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | 56 | # init distributed env first, since logger depends on the dist info. 57 | if args.launcher == 'none': 58 | distributed = False 59 | else: 60 | distributed = True 61 | init_dist(args.launcher, **cfg.dist_params) 62 | 63 | # init logger before other steps 64 | logger = get_root_logger(cfg.log_level) 65 | logger.info('Distributed training: {}'.format(distributed)) 66 | 67 | # set random seeds 68 | if args.seed is not None: 69 | logger.info('Set random seed to {}'.format(args.seed)) 70 | set_random_seed(args.seed) 71 | 72 | model = build_detector( 73 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 74 | 75 | train_dataset = get_dataset(cfg.data.train) 76 | if cfg.checkpoint_config is not None: 77 | # save mmdet version, config file content and class names in 78 | # checkpoints as meta data 79 | cfg.checkpoint_config.meta = dict( 80 | mmdet_version=__version__, 81 | config=cfg.text, 82 | CLASSES=train_dataset.CLASSES) 83 | # add an attribute for visualization convenience 84 | model.CLASSES = train_dataset.CLASSES 85 | train_detector( 86 | model, 87 | train_dataset, 88 | cfg, 89 | distributed=distributed, 90 | validate=args.validate, 91 | logger=logger) 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /tools/upgrade_model_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert(in_file, out_file): 9 | """Convert keys in checkpoints. 10 | 11 | There can be some breaking changes during the development of mmdetection, 12 | and this tool is used for upgrading checkpoints trained with old versions 13 | to the latest one. 14 | """ 15 | checkpoint = torch.load(in_file) 16 | in_state_dict = checkpoint.pop('state_dict') 17 | out_state_dict = OrderedDict() 18 | for key, val in in_state_dict.items(): 19 | # Use ConvModule instead of nn.Conv2d in RetinaNet 20 | # cls_convs.0.weight -> cls_convs.0.conv.weight 21 | m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key) 22 | if m is not None: 23 | param = m.groups()[1] 24 | new_key = key.replace(param, 'conv.{}'.format(param)) 25 | out_state_dict[new_key] = val 26 | continue 27 | 28 | out_state_dict[key] = val 29 | checkpoint['state_dict'] = out_state_dict 30 | torch.save(checkpoint, out_file) 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description='Upgrade model version') 35 | parser.add_argument('in_file', help='input checkpoint file') 36 | parser.add_argument('out_file', help='output checkpoint file') 37 | args = parser.parse_args() 38 | convert(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | --------------------------------------------------------------------------------