├── .gitignore ├── GETTING_STARTED.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── README_d2.md ├── configs ├── Base-RCNN-C4.yaml ├── Base-RCNN-DilatedC5.yaml ├── Base-RCNN-FPN.yaml ├── Base-RetinaNet.yaml ├── COCO-Detection │ ├── fast_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_101_C4_3x.yaml │ ├── faster_rcnn_R_101_DC5_3x.yaml │ ├── faster_rcnn_R_101_FPN_3x.yaml │ ├── faster_rcnn_R_50_C4_1x.yaml │ ├── faster_rcnn_R_50_C4_3x.yaml │ ├── faster_rcnn_R_50_DC5_1x.yaml │ ├── faster_rcnn_R_50_DC5_3x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_3x.yaml │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── retinanet_R_101_FPN_3x.yaml │ ├── retinanet_R_50_FPN_1x.yaml │ ├── retinanet_R_50_FPN_3x.yaml │ ├── rpn_R_50_C4_1x.yaml │ ├── rpn_R_50_FPN_1x.yaml │ └── zhang │ │ └── faster_rcnn_R_50_C4_1x.yaml ├── COCO-InstanceSegmentation │ ├── mask_rcnn_R_101_C4_3x.yaml │ ├── mask_rcnn_R_101_DC5_3x.yaml │ ├── mask_rcnn_R_101_FPN_3x.yaml │ ├── mask_rcnn_R_50_C4_1x.yaml │ ├── mask_rcnn_R_50_C4_3x.yaml │ ├── mask_rcnn_R_50_DC5_1x.yaml │ ├── mask_rcnn_R_50_DC5_3x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x_giou.yaml │ ├── mask_rcnn_R_50_FPN_3x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-Keypoints │ ├── Base-Keypoint-RCNN-FPN.yaml │ ├── keypoint_rcnn_R_101_FPN_3x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_3x.yaml │ └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-PanopticSegmentation │ ├── Base-Panoptic-FPN.yaml │ ├── panoptic_fpn_R_101_3x.yaml │ ├── panoptic_fpn_R_50_1x.yaml │ └── panoptic_fpn_R_50_3x.yaml ├── Cityscapes │ └── mask_rcnn_R_50_FPN.yaml ├── Detectron1-Comparisons │ ├── README.md │ ├── faster_rcnn_R_50_FPN_noaug_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_R_50_FPN_noaug_1x.yaml ├── LVISv0.5-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── LVISv1-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── Misc │ ├── cascade_mask_rcnn_R_50_FPN_1x.yaml │ ├── cascade_mask_rcnn_R_50_FPN_3x.yaml │ ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml │ ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml │ ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml │ ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml │ └── semantic_R_50_FPN_1x.yaml ├── PascalVOC-Detection │ ├── faster_rcnn_R_50_C4.yaml │ └── faster_rcnn_R_50_FPN.yaml └── quick_schedules │ ├── README.md │ ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml │ ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── fast_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml │ ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml │ ├── mask_rcnn_R_50_C4_inference_acc_test.yaml │ ├── mask_rcnn_R_50_C4_instant_test.yaml │ ├── mask_rcnn_R_50_C4_training_acc_test.yaml │ ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_instant_test.yaml │ ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_training_acc_test.yaml │ ├── panoptic_fpn_R_50_inference_acc_test.yaml │ ├── panoptic_fpn_R_50_instant_test.yaml │ ├── panoptic_fpn_R_50_training_acc_test.yaml │ ├── retinanet_R_50_FPN_inference_acc_test.yaml │ ├── retinanet_R_50_FPN_instant_test.yaml │ ├── rpn_R_50_FPN_inference_acc_test.yaml │ ├── rpn_R_50_FPN_instant_test.yaml │ ├── semantic_R_50_FPN_inference_acc_test.yaml │ ├── semantic_R_50_FPN_instant_test.yaml │ └── semantic_R_50_FPN_training_acc_test.yaml ├── datasets ├── README.md ├── prepare_cocofied_lvis.py ├── prepare_for_tests.sh └── prepare_panoptic_fpn.py ├── demo ├── demo.py └── predictor.py ├── demo_d2 ├── README.md ├── demo.py └── predictor.py ├── detectron2 ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── c2_model_loading.py │ ├── catalog.py │ └── detection_checkpoint.py ├── config │ ├── __init__.py │ ├── compat.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── build.py │ ├── catalog.py │ ├── common.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── builtin_meta.py │ │ ├── citypersons.py │ │ ├── cityscapes.py │ │ ├── cityscapes_panoptic.py │ │ ├── coco.py │ │ ├── coco_panoptic.py │ │ ├── crowdhuman.py │ │ ├── lvis.py │ │ ├── lvis_v0_5_categories.py │ │ ├── lvis_v1_categories.py │ │ ├── mot.py │ │ ├── pascal_voc.py │ │ └── register_coco.py │ ├── detection_utils.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── grouped_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── augmentation.py │ │ ├── augmentation_impl.py │ │ └── transform.py ├── engine │ ├── __init__.py │ ├── defaults.py │ ├── hooks.py │ ├── launch.py │ └── train_loop.py ├── evaluation │ ├── __init__.py │ ├── cityscapes_evaluation.py │ ├── coco_evaluation.py │ ├── evaluator.py │ ├── fast_eval_api.py │ ├── lvis_evaluation.py │ ├── panoptic_evaluation.py │ ├── pascal_voc_evaluation.py │ ├── rotated_coco_evaluation.py │ ├── sem_seg_evaluation.py │ └── testing.py ├── export │ ├── README.md │ ├── __init__.py │ ├── api.py │ ├── c10.py │ ├── caffe2_export.py │ ├── caffe2_inference.py │ ├── caffe2_modeling.py │ ├── caffe2_patch.py │ ├── shared.py │ ├── torchscript.py │ └── torchscript_patch.py ├── layers │ ├── __init__.py │ ├── aspp.py │ ├── batch_norm.py │ ├── blocks.py │ ├── csrc │ │ ├── README.md │ │ ├── ROIAlign │ │ │ ├── ROIAlign.h │ │ │ ├── ROIAlign_cpu.cpp │ │ │ └── ROIAlign_cuda.cu │ │ ├── ROIAlignRotated │ │ │ ├── ROIAlignRotated.h │ │ │ ├── ROIAlignRotated_cpu.cpp │ │ │ └── ROIAlignRotated_cuda.cu │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ ├── box_iou_rotated_cpu.cpp │ │ │ ├── box_iou_rotated_cuda.cu │ │ │ └── box_iou_rotated_utils.h │ │ ├── cocoeval │ │ │ ├── cocoeval.cpp │ │ │ └── cocoeval.h │ │ ├── cuda_version.cu │ │ ├── deformable │ │ │ ├── deform_conv.h │ │ │ ├── deform_conv_cuda.cu │ │ │ └── deform_conv_cuda_kernel.cu │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ ├── nms_rotated_cpu.cpp │ │ │ └── nms_rotated_cuda.cu │ │ └── vision.cpp │ ├── deform_conv.py │ ├── mask_ops.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_align_rotated.py │ ├── rotated_boxes.py │ ├── shape_spec.py │ └── wrappers.py ├── model_zoo │ ├── __init__.py │ └── model_zoo.py ├── modeling │ ├── __init__.py │ ├── anchor_generator.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── build.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── box_regression.py │ ├── matcher.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── build.py │ │ ├── panoptic_fpn.py │ │ ├── rcnn.py │ │ ├── retinanet.py │ │ └── semantic_seg.py │ ├── poolers.py │ ├── postprocessing.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── build.py │ │ ├── proposal_utils.py │ │ ├── rpn.py │ │ └── rrpn.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── keypoint_head.py │ │ ├── mask_head.py │ │ ├── roi_heads.py │ │ └── rotated_fast_rcnn.py │ ├── sampling.py │ └── test_time_augmentation.py ├── projects │ ├── README.md │ └── __init__.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── boxes.py │ ├── image_list.py │ ├── instances.py │ ├── keypoints.py │ ├── masks.py │ └── rotated_boxes.py └── utils │ ├── README.md │ ├── __init__.py │ ├── analysis.py │ ├── collect_env.py │ ├── colormap.py │ ├── comm.py │ ├── env.py │ ├── events.py │ ├── file_io.py │ ├── logger.py │ ├── memory.py │ ├── registry.py │ ├── serialize.py │ ├── testing.py │ ├── video_visualizer.py │ └── visualizer.py ├── dev ├── README.md ├── linter.sh ├── packaging │ ├── README.md │ ├── build_all_wheels.sh │ ├── build_wheel.sh │ ├── gen_install_table.py │ ├── gen_wheel_index.sh │ └── pkg_helpers.bash ├── parse_results.sh ├── run_inference_tests.sh └── run_instant_tests.sh ├── docker ├── Dockerfile ├── Dockerfile-circleci ├── README.md └── docker-compose.yml ├── docs ├── .gitignore ├── Makefile ├── README.md ├── _static │ └── css │ │ └── custom.css ├── conf.py ├── index.rst ├── modules │ ├── checkpoint.rst │ ├── config.rst │ ├── data.rst │ ├── engine.rst │ ├── evaluation.rst │ ├── export.rst │ ├── index.rst │ ├── layers.rst │ ├── model_zoo.rst │ ├── modeling.rst │ ├── solver.rst │ ├── structures.rst │ └── utils.rst ├── notes │ ├── benchmarks.md │ ├── changelog.md │ ├── compatibility.md │ ├── contributing.md │ └── index.rst ├── requirements.txt └── tutorials │ ├── README.md │ ├── builtin_datasets.md │ ├── configs.md │ ├── data_loading.md │ ├── datasets.md │ ├── deployment.md │ ├── evaluation.md │ ├── extend.md │ ├── getting_started.md │ ├── index.rst │ ├── install.md │ ├── models.md │ ├── training.md │ └── write-models.md ├── projects ├── crowd-e2e-sparse-rcnn │ ├── __init__.py │ ├── config.py │ ├── configs │ │ ├── 50e.6h.500pro.ignore.yaml │ │ ├── Base-SparseRCNN.yaml │ │ └── sparsercnn.crowdhuman.res50.500pro.50e.yaml │ ├── dataset_mapper.py │ ├── eval_result.py │ ├── models │ │ ├── detector.py │ │ ├── dynamic_conv.py │ │ ├── head.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── rcnn_head.py │ │ ├── relation_net.py │ │ └── shared_head.py │ ├── run_test.sh │ ├── run_train.sh │ └── train_net.py └── sparse-rcnn-baseline │ ├── __init__.py │ ├── config.py │ ├── configs │ ├── 50e.6h.500pro.ignore.yaml │ ├── Base-SparseRCNN.yaml │ └── sparsercnn.crowdhuman.res50.500pro.50e.yaml │ ├── dataset_mapper.py │ ├── eval_result.py │ ├── models │ ├── detector.py │ ├── dynamic_conv.py │ ├── head.py │ ├── loss.py │ └── matcher.py │ ├── run_test.sh │ ├── run_train.sh │ └── train_net.py ├── readme └── fig.jpg ├── setup.cfg ├── setup.py ├── tests ├── README.md ├── __init__.py ├── data │ ├── __init__.py │ ├── test_coco.py │ ├── test_coco_evaluation.py │ ├── test_detection_utils.py │ ├── test_rotation_transform.py │ ├── test_sampler.py │ └── test_transforms.py ├── layers │ ├── __init__.py │ ├── test_mask_ops.py │ ├── test_nms_rotated.py │ ├── test_roi_align.py │ └── test_roi_align_rotated.py ├── modeling │ ├── __init__.py │ ├── test_anchor_generator.py │ ├── test_box2box_transform.py │ ├── test_fast_rcnn.py │ ├── test_matcher.py │ ├── test_model_e2e.py │ ├── test_roi_heads.py │ ├── test_roi_pooler.py │ └── test_rpn.py ├── structures │ ├── __init__.py │ ├── test_boxes.py │ ├── test_imagelist.py │ ├── test_instances.py │ └── test_rotated_boxes.py ├── test_checkpoint.py ├── test_config.py ├── test_export_caffe2.py ├── test_model_analysis.py ├── test_model_zoo.py └── test_visualizer.py ├── tools ├── README.md ├── analyze_model.py ├── benchmark.py ├── convert-torchvision-to-d2.py ├── deploy │ ├── CMakeLists.txt │ ├── README.md │ ├── caffe2_converter.py │ ├── caffe2_mask_rcnn.cpp │ └── torchscript_traced_mask_rcnn.cpp ├── plain_train_net.py ├── train_net.py ├── visualize_data.py └── visualize_json_results.py └── utils ├── Makefile ├── __init__.py ├── benchmark.py ├── box.py ├── box_ops.py ├── common.py ├── detToolkits ├── .gitignore ├── detools │ ├── __init__.py │ ├── box.py │ ├── database.py │ ├── evaluator.py │ └── image.py ├── eval.py └── eval_csv.py ├── detbox.py ├── draw.py ├── infrastructure.py ├── matching.py ├── misc.py ├── nms ├── __init__.py ├── cpu_nms.c ├── cpu_nms.pyx ├── gpu_nms.cpp ├── gpu_nms.hpp ├── gpu_nms.pyx ├── nms_kernel.cu └── py_cpu_nms.py ├── nms_wrapper.py ├── plot_utils.py ├── py_cpu_nms.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | output* 4 | instant_test_output 5 | inference_test_output 6 | *.png 7 | *.json 8 | *.diff 9 | .DS_Store 10 | # compilation and distribution 11 | __pycache__ 12 | _ext 13 | *.pyc 14 | *.pyd 15 | *.so 16 | detectron2.egg-info/ 17 | build/ 18 | dist/ 19 | wheels/ 20 | 21 | # pytorch/python/numpy formats 22 | *.pth 23 | *.pkl 24 | *.npy 25 | 26 | # ipython/jupyter notebooks 27 | *.ipynb 28 | **/.ipynb_checkpoints/ 29 | 30 | # Editor temporaries 31 | *.swn 32 | *.swo 33 | *.swp 34 | *~ 35 | 36 | # editor settings 37 | .idea 38 | .vscode 39 | _darcs 40 | 41 | # project dirs 42 | /detectron2/model_zoo/configs 43 | /datasets/* 44 | !/datasets/*.* 45 | /projects/*/datasets 46 | # /models 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 MEGVII Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README_d2.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Detectron2 is Facebook AI Research's next generation software system 4 | that implements state-of-the-art object detection algorithms. 5 | It is a ground-up rewrite of the previous version, 6 | [Detectron](https://github.com/facebookresearch/Detectron/), 7 | and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/). 8 | 9 |
10 | 11 |
12 | 13 | ### What's New 14 | * It is powered by the [PyTorch](https://pytorch.org) deep learning framework. 15 | * Includes more features such as panoptic segmentation, densepose, Cascade R-CNN, rotated bounding boxes, etc. 16 | * Can be used as a library to support [different projects](projects/) on top of it. 17 | We'll open source more research projects in this way. 18 | * It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html). 19 | 20 | See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/) 21 | to see more demos and learn about detectron2. 22 | 23 | ## Installation 24 | 25 | See [INSTALL.md](INSTALL.md). 26 | 27 | ## Quick Start 28 | 29 | See [GETTING_STARTED.md](GETTING_STARTED.md), 30 | or the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5). 31 | 32 | Learn more at our [documentation](https://detectron2.readthedocs.org). 33 | And see [projects/](projects/) for some projects that are built on top of detectron2. 34 | 35 | ## Model Zoo and Baselines 36 | 37 | We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md). 38 | 39 | 40 | ## License 41 | 42 | Detectron2 is released under the [Apache 2.0 license](LICENSE). 43 | 44 | ## Citing Detectron2 45 | 46 | If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry. 47 | 48 | ```BibTeX 49 | @misc{wu2019detectron2, 50 | author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and 51 | Wan-Yen Lo and Ross Girshick}, 52 | title = {Detectron2}, 53 | howpublished = {\url{https://github.com/facebookresearch/detectron2}}, 54 | year = {2019} 55 | } 56 | ``` 57 | -------------------------------------------------------------------------------- /configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 19 | -------------------------------------------------------------------------------- /configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | INPUT: 24 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 25 | VERSION: 2 26 | -------------------------------------------------------------------------------- /configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/zhang/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "models/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | SOLVER: 11 | IMS_PER_BATCH: 2 12 | BASE_LR: 0.02 13 | STEPS: (60000, 80000) 14 | MAX_ITER: 90000 15 | INPUT: 16 | FORMAT: "RGB" 17 | # 记得改mean/bias. -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | RPN: 8 | BBOX_REG_LOSS_TYPE: "giou" 9 | BBOX_REG_LOSS_WEIGHT: 2.0 10 | ROI_BOX_HEAD: 11 | BBOX_REG_LOSS_TYPE: "giou" 12 | BBOX_REG_LOSS_WEIGHT: 10.0 13 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | KEYPOINT_ON: True 4 | ROI_HEADS: 5 | NUM_CLASSES: 1 6 | ROI_BOX_HEAD: 7 | SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss 8 | RPN: 9 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 10 | # 1000 proposals per-image is found to hurt box AP. 11 | # Therefore we increase it to 1500 per-image. 12 | POST_NMS_TOPK_TRAIN: 1500 13 | DATASETS: 14 | TRAIN: ("keypoints_coco_2017_train",) 15 | TEST: ("keypoints_coco_2017_val",) 16 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (210000, 250000) 12 | MAX_ITER: 270000 13 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | DATALOADER: 11 | FILTER_EMPTY_ANNOTATIONS: False 12 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | # For better, more stable performance initialize from COCO 5 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 6 | MASK_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 8 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A 10 | # But there are some differences, e.g., we did not initialize the output 11 | # layer using the corresponding classes from COCO 12 | INPUT: 13 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 14 | MIN_SIZE_TRAIN_SAMPLING: "choice" 15 | MIN_SIZE_TEST: 1024 16 | MAX_SIZE_TRAIN: 2048 17 | MAX_SIZE_TEST: 2048 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instance_seg_train",) 20 | TEST: ("cityscapes_fine_instance_seg_val",) 21 | SOLVER: 22 | BASE_LR: 0.01 23 | STEPS: (18000,) 24 | MAX_ITER: 24000 25 | IMS_PER_BATCH: 8 26 | TEST: 27 | EVAL_PERIOD: 8000 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | INPUT: 16 | # no scale augmentation 17 | MIN_SIZE_TRAIN: (800, ) 18 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_KEYPOINT_HEAD: 10 | POOLER_RESOLUTION: 14 11 | POOLER_SAMPLING_RATIO: 2 12 | POOLER_TYPE: "ROIAlign" 13 | # Detectron1 uses smooth L1 loss with some magic beta values. 14 | # The defaults are changed to L1 loss in Detectron2. 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 17 | POOLER_SAMPLING_RATIO: 2 18 | POOLER_TYPE: "ROIAlign" 19 | RPN: 20 | SMOOTH_L1_BETA: 0.1111 21 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 22 | # 1000 proposals per-image is found to hurt box AP. 23 | # Therefore we increase it to 1500 per-image. 24 | POST_NMS_TOPK_TRAIN: 1500 25 | DATASETS: 26 | TRAIN: ("keypoints_coco_2017_train",) 27 | TEST: ("keypoints_coco_2017_val",) 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | ROI_MASK_HEAD: 16 | POOLER_SAMPLING_RATIO: 2 17 | POOLER_TYPE: "ROIAlign" 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1230 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v0.5_train",) 18 | TEST: ("lvis_v0.5_val",) 19 | TEST: 20 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 21 | DATALOADER: 22 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 23 | REPEAT_THRESHOLD: 0.001 24 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1203 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train",) 18 | TEST: ("lvis_v1_val",) 19 | SOLVER: 20 | STEPS: (120000, 160000) 21 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 22 | TEST: 23 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 24 | DATALOADER: 25 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 26 | REPEAT_THRESHOLD: 0.001 27 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | SOLVER: 14 | STEPS: (210000, 250000) 15 | MAX_ITER: 270000 16 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_CONV: 4 16 | NUM_FC: 1 17 | NORM: "GN" 18 | CLS_AGNOSTIC_BBOX_REG: True 19 | ROI_MASK_HEAD: 20 | NUM_CONV: 8 21 | NORM: "GN" 22 | RPN: 23 | POST_NMS_TOPK_TRAIN: 2000 24 | SOLVER: 25 | IMS_PER_BATCH: 128 26 | STEPS: (35000, 45000) 27 | MAX_ITER: 50000 28 | BASE_LR: 0.16 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 864) 31 | MIN_SIZE_TRAIN_SAMPLING: "range" 32 | MAX_SIZE_TRAIN: 1440 33 | CROP: 34 | ENABLED: True 35 | TEST: 36 | EVAL_PERIOD: 2500 37 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_BOX_HEAD: 8 | CLS_AGNOSTIC_BBOX_REG: True 9 | ROI_MASK_HEAD: 10 | CLS_AGNOSTIC_MASK: True 11 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "GN" 8 | STRIDE_IN_1X1: False 9 | FPN: 10 | NORM: "GN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "GN" 16 | ROI_MASK_HEAD: 17 | NORM: "GN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "SyncBN" 8 | STRIDE_IN_1X1: True 9 | FPN: 10 | NORM: "SyncBN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "SyncBN" 16 | ROI_MASK_HEAD: 17 | NORM: "SyncBN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | TEST: 23 | PRECISE_BN: 24 | ENABLED: True 25 | -------------------------------------------------------------------------------- /configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml: -------------------------------------------------------------------------------- 1 | # A large PanopticFPN for demo purposes. 2 | # Use GN on backbone to support semantic seg. 3 | # Use Cascade + Deform Conv to improve localization. 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" 5 | MODEL: 6 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" 7 | RESNETS: 8 | DEPTH: 101 9 | NORM: "GN" 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | STRIDE_IN_1X1: False 12 | FPN: 13 | NORM: "GN" 14 | ROI_HEADS: 15 | NAME: CascadeROIHeads 16 | ROI_BOX_HEAD: 17 | CLS_AGNOSTIC_BBOX_REG: True 18 | ROI_MASK_HEAD: 19 | NORM: "GN" 20 | RPN: 21 | POST_NMS_TOPK_TRAIN: 2000 22 | SOLVER: 23 | STEPS: (105000, 125000) 24 | MAX_ITER: 135000 25 | IMS_PER_BATCH: 32 26 | BASE_LR: 0.04 27 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | # Train from random initialization. 4 | WEIGHTS: "" 5 | # It makes sense to divide by STD when training from scratch 6 | # But it seems to make no difference on the results and C2's models didn't do this. 7 | # So we keep things consistent with C2. 8 | # PIXEL_STD: [57.375, 57.12, 58.395] 9 | MASK_ON: True 10 | BACKBONE: 11 | FREEZE_AT: 0 12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 13 | # to learn what you need for training from scratch. 14 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/semantic_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/README.md: -------------------------------------------------------------------------------- 1 | These are quick configs for performance or accuracy regression tracking purposes. 2 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_val_100",) 4 | TEST: ("coco_2017_val_100",) 5 | SOLVER: 6 | BASE_LR: 0.005 7 | STEPS: (30,) 8 | MAX_ITER: 40 9 | IMS_PER_BATCH: 4 10 | DATALOADER: 11 | NUM_WORKERS: 2 12 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 7 | TEST: ("coco_2017_val_100",) 8 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 9 | SOLVER: 10 | BASE_LR: 0.005 11 | STEPS: (30,) 12 | MAX_ITER: 40 13 | IMS_PER_BATCH: 4 14 | DATALOADER: 15 | NUM_WORKERS: 2 16 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" 4 | DATASETS: 5 | TEST: ("keypoints_coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | DATASETS: 6 | TRAIN: ("keypoints_coco_2017_val_100",) 7 | TEST: ("keypoints_coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False 14 | LOSS_WEIGHT: 4.0 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 17 | RPN: 18 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 19 | DATASETS: 20 | TRAIN: ("keypoints_coco_2017_val",) 21 | TEST: ("keypoints_coco_2017_val",) 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | SOLVER: 25 | WARMUP_FACTOR: 0.33333333 26 | WARMUP_ITERS: 100 27 | STEPS: (5500, 5800) 28 | MAX_ITER: 6000 29 | TEST: 30 | EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] 31 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | ROI_BOX_HEAD: 14 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 15 | RPN: 16 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 17 | DATASETS: 18 | TRAIN: ("keypoints_coco_2017_val",) 19 | TEST: ("keypoints_coco_2017_val",) 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | SOLVER: 23 | WARMUP_FACTOR: 0.33333333 24 | WARMUP_ITERS: 100 25 | STEPS: (5500, 5800) 26 | MAX_ITER: 6000 27 | TEST: 28 | EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | CLIP_GRADIENTS: 14 | ENABLED: True 15 | CLIP_TYPE: "value" 16 | CLIP_VALUE: 1.0 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # base uses 16 17 | WARMUP_FACTOR: 0.33333 18 | WARMUP_ITERS: 100 19 | STEPS: (11000, 11600) 20 | MAX_ITER: 12000 21 | TEST: 22 | EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] 23 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] 8 | AUG: 9 | ENABLED: True 10 | MIN_SIZES: (700, 800) # to save some time 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | TRAIN_ON_PRED_BOXES: True 5 | TEST: 6 | EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] 7 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | WARMUP_FACTOR: 0.3333333 17 | WARMUP_ITERS: 100 18 | STEPS: (5500, 5800) 19 | MAX_ITER: 6000 20 | TEST: 21 | EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] 22 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100_panoptic_separated",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_100_panoptic_separated",) 12 | TEST: ("coco_2017_val_100_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.005 15 | STEPS: (30,) 16 | MAX_ITER: 40 17 | IMS_PER_BATCH: 4 18 | DATALOADER: 19 | NUM_WORKERS: 1 20 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_panoptic_separated",) 12 | TEST: ("coco_2017_val_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.01 15 | WARMUP_FACTOR: 0.001 16 | WARMUP_ITERS: 500 17 | STEPS: (5500,) 18 | MAX_ITER: 7000 19 | TEST: 20 | EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 38.73, 0.7], ["sem_seg", "mIoU", 64.73, 1.2], ["panoptic_seg", "PQ", 48.13, 0.8]] 21 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | BASE_LR: 0.005 9 | STEPS: (30,) 10 | MAX_ITER: 40 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | STEPS: (30,) 9 | MAX_ITER: 40 10 | BASE_LR: 0.005 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: 10 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | SOLVER: 13 | BASE_LR: 0.005 14 | STEPS: (30,) 15 | MAX_ITER: 40 16 | IMS_PER_BATCH: 4 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WARMUP_FACTOR: 0.001 13 | WARMUP_ITERS: 300 14 | STEPS: (5500,) 15 | MAX_ITER: 7000 16 | TEST: 17 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | # Setup Builtin Datasets 2 | 3 | Detectron2 has builtin support for a few datasets. 4 | The datasets are assumed to exist in a directory specified by the environment variable 5 | `DETECTRON2_DATASETS`. 6 | Under this directory, detectron2 will look for datasets in the structure described below, if needed. 7 | ``` 8 | $DETECTRON2_DATASETS/ 9 | coco/ 10 | lvis/ 11 | cityscapes/ 12 | VOC20{07,12}/ 13 | ``` 14 | 15 | You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. 16 | If left unset, the default is `./datasets` relative to your current working directory. 17 | 18 | The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) 19 | contains configs and models that use these builtin datasets. 20 | 21 | ## Expected dataset structure for COCO instance/keypoint detection: 22 | 23 | ``` 24 | coco/ 25 | annotations/ 26 | instances_{train,val}2017.json 27 | person_keypoints_{train,val}2017.json 28 | {train,val}2017/ 29 | # image files that are mentioned in the corresponding json 30 | ``` 31 | 32 | You can use the 2014 version of the dataset as well. 33 | 34 | Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, 35 | which you can download with `./prepare_for_tests.sh`. 36 | 37 | ## Expected dataset structure for PanopticFPN: 38 | 39 | ``` 40 | coco/ 41 | annotations/ 42 | panoptic_{train,val}2017.json 43 | panoptic_{train,val}2017/ # png annotations 44 | panoptic_stuff_{train,val}2017/ # generated by the script mentioned below 45 | ``` 46 | 47 | Install panopticapi by: 48 | ``` 49 | pip install git+https://github.com/cocodataset/panopticapi.git 50 | ``` 51 | Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. 52 | 53 | ## Expected dataset structure for LVIS instance segmentation: 54 | ``` 55 | coco/ 56 | {train,val,test}2017/ 57 | lvis/ 58 | lvis_v0.5_{train,val}.json 59 | lvis_v0.5_image_info_test.json 60 | ``` 61 | 62 | Install lvis-api by: 63 | ``` 64 | pip install git+https://github.com/lvis-dataset/lvis-api.git 65 | ``` 66 | 67 | Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations, which can be used to evaluate models trained on the COCO dataset. 68 | 69 | ## Expected dataset structure for cityscapes: 70 | ``` 71 | cityscapes/ 72 | gtFine/ 73 | train/ 74 | aachen/ 75 | color.png, instanceIds.png, labelIds.png, polygons.json, 76 | labelTrainIds.png 77 | ... 78 | val/ 79 | test/ 80 | leftImg8bit/ 81 | train/ 82 | val/ 83 | test/ 84 | ``` 85 | Install cityscapes scripts by: 86 | ``` 87 | pip install git+https://github.com/mcordts/cityscapesScripts.git 88 | ``` 89 | 90 | Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with: 91 | ``` 92 | CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py 93 | ``` 94 | These files are not needed for instance segmentation. 95 | 96 | ## Expected dataset structure for Pascal VOC: 97 | ``` 98 | VOC20{07,12}/ 99 | Annotations/ 100 | ImageSets/ 101 | Main/ 102 | trainval.txt 103 | test.txt 104 | # train.txt or val.txt, if you use these splits 105 | JPEGImages/ 106 | ``` 107 | -------------------------------------------------------------------------------- /datasets/prepare_for_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Download some files needed for running tests. 5 | 6 | cd "${0%/*}" 7 | 8 | BASE=https://dl.fbaipublicfiles.com/detectron2 9 | mkdir -p coco/annotations 10 | 11 | for anno in instances_val2017_100 \ 12 | person_keypoints_val2017_100 \ 13 | instances_minival2014_100 \ 14 | person_keypoints_minival2014_100; do 15 | 16 | dest=coco/annotations/$anno.json 17 | [[ -s $dest ]] && { 18 | echo "$dest exists. Skipping ..." 19 | } || { 20 | wget $BASE/annotations/coco/$anno.json -O $dest 21 | } 22 | done 23 | -------------------------------------------------------------------------------- /demo_d2/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Detectron2 Demo 3 | 4 | We provide a command line tool to run a simple demo of builtin models. 5 | The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md). 6 | 7 | See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-) 8 | for a high-quality demo generated with this tool. 9 | -------------------------------------------------------------------------------- /detectron2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | 8 | # This line will be programatically read/write by setup.py. 9 | # Leave them at the bottom of this file and don't touch them. 10 | __version__ = "0.3" 11 | -------------------------------------------------------------------------------- /detectron2/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .detection_checkpoint import DetectionCheckpointer 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /detectron2/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .compat import downgrade_config, upgrade_config 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable 4 | 5 | __all__ = [ 6 | "CfgNode", 7 | "get_cfg", 8 | "global_cfg", 9 | "set_global_cfg", 10 | "downgrade_config", 11 | "upgrade_config", 12 | "configurable", 13 | ] 14 | -------------------------------------------------------------------------------- /detectron2/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_batch_data_loader, 6 | build_detection_test_loader, 7 | build_detection_train_loader, 8 | get_detection_dataset_dicts, 9 | load_proposals_into_dataset, 10 | print_instances_class_histogram, 11 | ) 12 | from .catalog import DatasetCatalog, MetadataCatalog, Metadata 13 | from .common import DatasetFromList, MapDataset 14 | from .dataset_mapper import DatasetMapper 15 | 16 | # ensure the builtin datasets are registered 17 | from . import datasets, samplers # isort:skip 18 | 19 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 20 | -------------------------------------------------------------------------------- /detectron2/data/datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Common Datasets 4 | 5 | The dataset implemented here do not need to load the data into the final format. 6 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient. 7 | 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images. 9 | Let the downstream decide how to read. 10 | -------------------------------------------------------------------------------- /detectron2/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances 3 | from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta 5 | from .pascal_voc import load_voc_instances, register_pascal_voc 6 | from . import builtin as _builtin # ensure the builtin datasets are registered 7 | 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /detectron2/data/datasets/register_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .coco import register_coco_instances # noqa 3 | from .coco_panoptic import register_coco_panoptic_separated # noqa 4 | -------------------------------------------------------------------------------- /detectron2/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | ] 11 | -------------------------------------------------------------------------------- /detectron2/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | """ 13 | 14 | def __init__(self, sampler, group_ids, batch_size): 15 | """ 16 | Args: 17 | sampler (Sampler): Base sampler. 18 | group_ids (list[int]): If the sampler produces indices in range [0, N), 19 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 20 | The group ids must be a set of integers in the range [0, num_groups). 21 | batch_size (int): Size of mini-batch. 22 | """ 23 | if not isinstance(sampler, Sampler): 24 | raise ValueError( 25 | "sampler should be an instance of " 26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 27 | ) 28 | self.sampler = sampler 29 | self.group_ids = np.asarray(group_ids) 30 | assert self.group_ids.ndim == 1 31 | self.batch_size = batch_size 32 | groups = np.unique(self.group_ids).tolist() 33 | 34 | # buffer the indices of each group until batch size is reached 35 | self.buffer_per_group = {k: [] for k in groups} 36 | 37 | def __iter__(self): 38 | for idx in self.sampler: 39 | group_id = self.group_ids[idx] 40 | group_buffer = self.buffer_per_group[group_id] 41 | group_buffer.append(idx) 42 | if len(group_buffer) == self.batch_size: 43 | yield group_buffer[:] # yield a copy of the list 44 | del group_buffer[:] 45 | 46 | def __len__(self): 47 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 48 | -------------------------------------------------------------------------------- /detectron2/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from fvcore.transforms.transform import Transform, TransformList # order them first 3 | from fvcore.transforms.transform import * 4 | from .transform import * 5 | from .augmentation import * 6 | from .augmentation_impl import * 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /detectron2/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 10 | # but still make them available here 11 | from .hooks import * 12 | from .defaults import * 13 | -------------------------------------------------------------------------------- /detectron2/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator 3 | from .coco_evaluation import COCOEvaluator 4 | from .rotated_coco_evaluation import RotatedCOCOEvaluator 5 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 6 | from .lvis_evaluation import LVISEvaluator 7 | from .panoptic_evaluation import COCOPanopticEvaluator 8 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 9 | from .sem_seg_evaluation import SemSegEvaluator 10 | from .testing import print_csv_format, verify_results 11 | 12 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 13 | -------------------------------------------------------------------------------- /detectron2/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import OrderedDict 7 | from collections.abc import Mapping 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 19 | logger = logging.getLogger(__name__) 20 | for task, res in results.items(): 21 | # Don't print "AP-category" metrics since they are usually not tracked. 22 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 23 | logger.info("copypaste: Task: {}".format(task)) 24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 26 | 27 | 28 | def verify_results(cfg, results): 29 | """ 30 | Args: 31 | results (OrderedDict[dict]): task_name -> {metric -> score} 32 | 33 | Returns: 34 | bool: whether the verification succeeds or not 35 | """ 36 | expected_results = cfg.TEST.EXPECTED_RESULTS 37 | if not len(expected_results): 38 | return True 39 | 40 | ok = True 41 | for task, metric, expected, tolerance in expected_results: 42 | actual = results[task].get(metric, None) 43 | if actual is None: 44 | ok = False 45 | continue 46 | if not np.isfinite(actual): 47 | ok = False 48 | continue 49 | diff = abs(actual - expected) 50 | if diff > tolerance: 51 | ok = False 52 | 53 | logger = logging.getLogger(__name__) 54 | if not ok: 55 | logger.error("Result verification failed!") 56 | logger.error("Expected Results: " + str(expected_results)) 57 | logger.error("Actual Results: " + pprint.pformat(results)) 58 | 59 | sys.exit(1) 60 | else: 61 | logger.info("Results verification passed.") 62 | return ok 63 | 64 | 65 | def flatten_results_dict(results): 66 | """ 67 | Expand a hierarchical dict of scalars into a flat dict of scalars. 68 | If results[k1][k2][k3] = v, the returned dict will have the entry 69 | {"k1/k2/k3": v}. 70 | 71 | Args: 72 | results (dict): 73 | """ 74 | r = {} 75 | for k, v in results.items(): 76 | if isinstance(v, Mapping): 77 | v = flatten_results_dict(v) 78 | for kk, vv in v.items(): 79 | r[k + "/" + kk] = vv 80 | else: 81 | r[k] = v 82 | return r 83 | -------------------------------------------------------------------------------- /detectron2/export/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains code to prepare a detectron2 model for deployment. 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. 4 | 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. 6 | 7 | 8 | ### Acknowledgements 9 | 10 | Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools. 11 | 12 | Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who 13 | help export Detectron2 models to TorchScript. 14 | -------------------------------------------------------------------------------- /detectron2/export/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .api import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /detectron2/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deform_conv import DeformConv, ModulatedDeformConv 4 | from .mask_ops import paste_masks_in_image 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated 6 | from .roi_align import ROIAlign, roi_align 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 8 | from .shape_spec import ShapeSpec 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple 10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d 11 | from .aspp import ASPP 12 | 13 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 14 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor ROIAlignRotated_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio); 14 | 15 | at::Tensor ROIAlignRotated_backward_cpu( 16 | const at::Tensor& grad, 17 | const at::Tensor& rois, 18 | const float spatial_scale, 19 | const int pooled_height, 20 | const int pooled_width, 21 | const int batch_size, 22 | const int channels, 23 | const int height, 24 | const int width, 25 | const int sampling_ratio); 26 | 27 | #if defined(WITH_CUDA) || defined(WITH_HIP) 28 | at::Tensor ROIAlignRotated_forward_cuda( 29 | const at::Tensor& input, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int sampling_ratio); 35 | 36 | at::Tensor ROIAlignRotated_backward_cuda( 37 | const at::Tensor& grad, 38 | const at::Tensor& rois, 39 | const float spatial_scale, 40 | const int pooled_height, 41 | const int pooled_width, 42 | const int batch_size, 43 | const int channels, 44 | const int height, 45 | const int width, 46 | const int sampling_ratio); 47 | #endif 48 | 49 | // Interface for Python 50 | inline at::Tensor ROIAlignRotated_forward( 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const float spatial_scale, 54 | const int pooled_height, 55 | const int pooled_width, 56 | const int sampling_ratio) { 57 | if (input.is_cuda()) { 58 | #if defined(WITH_CUDA) || defined(WITH_HIP) 59 | return ROIAlignRotated_forward_cuda( 60 | input, 61 | rois, 62 | spatial_scale, 63 | pooled_height, 64 | pooled_width, 65 | sampling_ratio); 66 | #else 67 | AT_ERROR("Not compiled with GPU support"); 68 | #endif 69 | } 70 | return ROIAlignRotated_forward_cpu( 71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 72 | } 73 | 74 | inline at::Tensor ROIAlignRotated_backward( 75 | const at::Tensor& grad, 76 | const at::Tensor& rois, 77 | const float spatial_scale, 78 | const int pooled_height, 79 | const int pooled_width, 80 | const int batch_size, 81 | const int channels, 82 | const int height, 83 | const int width, 84 | const int sampling_ratio) { 85 | if (grad.is_cuda()) { 86 | #if defined(WITH_CUDA) || defined(WITH_HIP) 87 | return ROIAlignRotated_backward_cuda( 88 | grad, 89 | rois, 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio); 98 | #else 99 | AT_ERROR("Not compiled with GPU support"); 100 | #endif 101 | } 102 | return ROIAlignRotated_backward_cpu( 103 | grad, 104 | rois, 105 | spatial_scale, 106 | pooled_height, 107 | pooled_width, 108 | batch_size, 109 | channels, 110 | height, 111 | width, 112 | sampling_ratio); 113 | } 114 | 115 | } // namespace detectron2 116 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #if defined(WITH_CUDA) || defined(WITH_HIP) 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #if defined(WITH_CUDA) || defined(WITH_HIP) 26 | return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); 33 | } 34 | 35 | } // namespace detectron2 36 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto num_boxes1 = boxes1.size(0); 13 | auto num_boxes2 = boxes2.size(0); 14 | 15 | for (int i = 0; i < num_boxes1; i++) { 16 | for (int j = 0; j < num_boxes2; j++) { 17 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 18 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 19 | } 20 | } 21 | } 22 | 23 | at::Tensor box_iou_rotated_cpu( 24 | // input must be contiguous: 25 | const at::Tensor& boxes1, 26 | const at::Tensor& boxes2) { 27 | auto num_boxes1 = boxes1.size(0); 28 | auto num_boxes2 = boxes2.size(0); 29 | at::Tensor ious = 30 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 31 | 32 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 33 | 34 | // reshape from 1d array to 2d array 35 | auto shape = std::vector{num_boxes1, num_boxes2}; 36 | return ious.reshape(shape); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include 4 | 5 | namespace detectron2 { 6 | int get_cudart_version() { 7 | // Not a ROCM platform: Either HIP is not used, or 8 | // it is used, but platform is not ROCM (i.e. it is CUDA) 9 | #if !defined(__HIP_PLATFORM_HCC__) 10 | return CUDART_VERSION; 11 | #else 12 | int version = 0; 13 | 14 | #if HIP_VERSION_MAJOR != 0 15 | // Create a convention similar to that of CUDA, as assumed by other 16 | // parts of the code. 17 | 18 | version = HIP_VERSION_MINOR; 19 | version += (HIP_VERSION_MAJOR * 100); 20 | #else 21 | hipRuntimeGetVersion(&version); 22 | #endif 23 | return version; 24 | #endif 25 | } 26 | } // namespace detectron2 27 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #if defined(WITH_CUDA) || defined(WITH_HIP) 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #if defined(WITH_CUDA) || defined(WITH_HIP) 29 | return nms_rotated_cuda( 30 | dets.contiguous(), scores.contiguous(), iou_threshold); 31 | #else 32 | AT_ERROR("Not compiled with GPU support"); 33 | #endif 34 | } 35 | 36 | return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.scalar_type() == scores.scalar_type(), 20 | "dets should have the same type as scores"); 21 | 22 | if (dets.numel() == 0) { 23 | return at::empty({0}, dets.options().dtype(at::kLong)); 24 | } 25 | 26 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 27 | 28 | auto ndets = dets.size(0); 29 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 30 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 31 | 32 | auto suppressed = suppressed_t.data_ptr(); 33 | auto keep = keep_t.data_ptr(); 34 | auto order = order_t.data_ptr(); 35 | 36 | int64_t num_to_keep = 0; 37 | 38 | for (int64_t _i = 0; _i < ndets; _i++) { 39 | auto i = order[_i]; 40 | if (suppressed[i] == 1) { 41 | continue; 42 | } 43 | 44 | keep[num_to_keep++] = i; 45 | 46 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 47 | auto j = order[_j]; 48 | if (suppressed[j] == 1) { 49 | continue; 50 | } 51 | 52 | auto ovr = single_box_iou_rotated( 53 | dets[i].data_ptr(), dets[j].data_ptr()); 54 | if (ovr >= iou_threshold) { 55 | suppressed[j] = 1; 56 | } 57 | } 58 | } 59 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 60 | } 61 | 62 | at::Tensor nms_rotated_cpu( 63 | // input must be contiguous 64 | const at::Tensor& dets, 65 | const at::Tensor& scores, 66 | const float iou_threshold) { 67 | auto result = at::empty({0}, dets.options()); 68 | 69 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { 70 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 71 | }); 72 | return result; 73 | } 74 | 75 | } // namespace detectron2 76 | -------------------------------------------------------------------------------- /detectron2/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | from detectron2 import _C 5 | 6 | 7 | def pairwise_iou_rotated(boxes1, boxes2): 8 | """ 9 | Return intersection-over-union (Jaccard index) of boxes. 10 | 11 | Both sets of boxes are expected to be in 12 | (x_center, y_center, width, height, angle) format. 13 | 14 | Arguments: 15 | boxes1 (Tensor[N, 5]) 16 | boxes2 (Tensor[M, 5]) 17 | 18 | Returns: 19 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 20 | IoU values for every element in boxes1 and boxes2 21 | """ 22 | return _C.box_iou_rotated(boxes1, boxes2) 23 | -------------------------------------------------------------------------------- /detectron2/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to complement the lack of shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /detectron2/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Model Zoo API for Detectron2: a collection of functions to create common model architectures and 4 | optionally load pre-trained weights as released in 5 | `MODEL_ZOO.md `_. 6 | """ 7 | from .model_zoo import get, get_config_file, get_checkpoint_url, get_config 8 | 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"] 10 | -------------------------------------------------------------------------------- /detectron2/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | 4 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY 5 | from .backbone import ( 6 | BACKBONE_REGISTRY, 7 | FPN, 8 | Backbone, 9 | ResNet, 10 | ResNetBlockBase, 11 | build_backbone, 12 | build_resnet_backbone, 13 | make_stage, 14 | ) 15 | from .meta_arch import ( 16 | META_ARCH_REGISTRY, 17 | SEM_SEG_HEADS_REGISTRY, 18 | GeneralizedRCNN, 19 | PanopticFPN, 20 | ProposalNetwork, 21 | RetinaNet, 22 | SemanticSegmentor, 23 | build_model, 24 | build_sem_seg_head, 25 | ) 26 | from .postprocessing import detector_postprocess 27 | from .proposal_generator import ( 28 | PROPOSAL_GENERATOR_REGISTRY, 29 | build_proposal_generator, 30 | RPN_HEAD_REGISTRY, 31 | build_rpn_head, 32 | ) 33 | from .roi_heads import ( 34 | ROI_BOX_HEAD_REGISTRY, 35 | ROI_HEADS_REGISTRY, 36 | ROI_KEYPOINT_HEAD_REGISTRY, 37 | ROI_MASK_HEAD_REGISTRY, 38 | ROIHeads, 39 | StandardROIHeads, 40 | BaseMaskRCNNHead, 41 | BaseKeypointRCNNHead, 42 | FastRCNNOutputLayers, 43 | build_box_head, 44 | build_keypoint_head, 45 | build_mask_head, 46 | build_roi_heads, 47 | ) 48 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA 49 | 50 | _EXCLUDE = {"ShapeSpec"} 51 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 52 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip 3 | 4 | from .backbone import Backbone 5 | from .fpn import FPN 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | # TODO can expose more resnet blocks after careful consideration 10 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self) -> int: 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec( 50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 51 | ) 52 | for name in self._out_features 53 | } 54 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | BACKBONE_REGISTRY.__doc__ = """ 9 | Registry for backbones, which extract feature maps from images 10 | 11 | The registered object must be a callable that accepts two arguments: 12 | 13 | 1. A :class:`detectron2.config.CfgNode` 14 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. 15 | 16 | Registered object must return instance of :class:`Backbone`. 17 | """ 18 | 19 | 20 | def build_backbone(cfg, input_shape=None): 21 | """ 22 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 23 | 24 | Returns: 25 | an instance of :class:`Backbone` 26 | """ 27 | if input_shape is None: 28 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 29 | 30 | backbone_name = cfg.MODEL.BACKBONE.NAME 31 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 32 | assert isinstance(backbone, Backbone) 33 | return backbone 34 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 5 | 6 | from .panoptic_fpn import PanopticFPN 7 | 8 | # import all the meta_arch, so they will be registered 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork 10 | from .retinanet import RetinaNet 11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head 12 | 13 | 14 | __all__ = list(globals().keys()) 15 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.utils.registry import Registry 5 | 6 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 7 | META_ARCH_REGISTRY.__doc__ = """ 8 | Registry for meta-architectures, i.e. the whole model. 9 | 10 | The registered object will be called with `obj(cfg)` 11 | and expected to return a `nn.Module` object. 12 | """ 13 | 14 | 15 | def build_model(cfg): 16 | """ 17 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. 18 | Note that it does not load any weights from ``cfg``. 19 | """ 20 | meta_arch = cfg.MODEL.META_ARCHITECTURE 21 | model = META_ARCH_REGISTRY.get(meta_arch)(cfg) 22 | model.to(torch.device(cfg.MODEL.DEVICE)) 23 | return model 24 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator 3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN 4 | 5 | __all__ = list(globals().keys()) 6 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from detectron2.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | 8 | The registered object will be called with `obj(cfg, input_shape)`. 9 | The call should return a `nn.Module` object. 10 | """ 11 | 12 | from . import rpn, rrpn # noqa F401 isort:skip 13 | 14 | 15 | def build_proposal_generator(cfg, input_shape): 16 | """ 17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 18 | The name can be "PrecomputedProposals" to use no proposal generator. 19 | """ 20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 21 | if name == "PrecomputedProposals": 22 | return None 23 | 24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 25 | -------------------------------------------------------------------------------- /detectron2/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead 3 | from .keypoint_head import ( 4 | ROI_KEYPOINT_HEAD_REGISTRY, 5 | build_keypoint_head, 6 | BaseKeypointRCNNHead, 7 | KRCNNConvDeconvUpsampleHead, 8 | ) 9 | from .mask_head import ( 10 | ROI_MASK_HEAD_REGISTRY, 11 | build_mask_head, 12 | BaseMaskRCNNHead, 13 | MaskRCNNConvUpsampleHead, 14 | ) 15 | from .roi_heads import ( 16 | ROI_HEADS_REGISTRY, 17 | ROIHeads, 18 | Res5ROIHeads, 19 | StandardROIHeads, 20 | build_roi_heads, 21 | select_foreground_proposals, 22 | ) 23 | from .rotated_fast_rcnn import RROIHeads 24 | from .fast_rcnn import FastRCNNOutputLayers 25 | 26 | from . import cascade_rcnn # isort:skip 27 | 28 | __all__ = list(globals().keys()) 29 | -------------------------------------------------------------------------------- /detectron2/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | from detectron2.layers import nonzero_tuple 5 | 6 | __all__ = ["subsample_labels"] 7 | 8 | 9 | def subsample_labels( 10 | labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int 11 | ): 12 | """ 13 | Return `num_samples` (or fewer, if not enough found) 14 | random samples from `labels` which is a mixture of positives & negatives. 15 | It will try to return as many positives as possible without 16 | exceeding `positive_fraction * num_samples`, and then try to 17 | fill the remaining slots with negatives. 18 | 19 | Args: 20 | labels (Tensor): (N, ) label vector with values: 21 | * -1: ignore 22 | * bg_label: background ("negative") class 23 | * otherwise: one or more foreground ("positive") classes 24 | num_samples (int): The total number of labels with value >= 0 to return. 25 | Values that are not sampled will be filled with -1 (ignore). 26 | positive_fraction (float): The number of subsampled labels with values > 0 27 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 28 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 29 | In order words, if there are not enough positives, the sample is filled with 30 | negatives. If there are also not enough negatives, then as many elements are 31 | sampled as is possible. 32 | bg_label (int): label index of background ("negative") class. 33 | 34 | Returns: 35 | pos_idx, neg_idx (Tensor): 36 | 1D vector of indices. The total length of both is `num_samples` or fewer. 37 | """ 38 | positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] 39 | negative = nonzero_tuple(labels == bg_label)[0] 40 | 41 | num_pos = int(num_samples * positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = num_samples - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx = positive[perm1] 53 | neg_idx = negative[perm2] 54 | return pos_idx, neg_idx 55 | -------------------------------------------------------------------------------- /detectron2/projects/README.md: -------------------------------------------------------------------------------- 1 | 2 | Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here. 3 | -------------------------------------------------------------------------------- /detectron2/projects/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import importlib 3 | from pathlib import Path 4 | 5 | _PROJECTS = { 6 | "point_rend": "PointRend", 7 | "deeplab": "DeepLab", 8 | "panoptic_deeplab": "Panoptic-DeepLab", 9 | } 10 | _PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects" 11 | 12 | if _PROJECT_ROOT.is_dir(): 13 | # This is true only for in-place installation (pip install -e, setup.py develop), 14 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 15 | 16 | class _D2ProjectsFinder(importlib.abc.MetaPathFinder): 17 | def find_spec(self, name, path, target=None): 18 | if not name.startswith("detectron2.projects."): 19 | return 20 | project_name = name.split(".")[-1] 21 | project_dir = _PROJECTS.get(project_name) 22 | if not project_dir: 23 | return 24 | target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py" 25 | if not target_file.is_file(): 26 | return 27 | return importlib.util.spec_from_file_location(name, target_file) 28 | 29 | import sys 30 | 31 | sys.meta_path.append(_D2ProjectsFinder()) 32 | -------------------------------------------------------------------------------- /detectron2/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /detectron2/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa 3 | from .image_list import ImageList 4 | 5 | from .instances import Instances 6 | from .keypoints import Keypoints, heatmaps_to_keypoints 7 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask 8 | from .rotated_boxes import RotatedBoxes 9 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 10 | 11 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 12 | -------------------------------------------------------------------------------- /detectron2/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /detectron2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /detectron2/utils/file_io.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase 2 | 3 | __all__ = ["PathManager", "PathHandler"] 4 | 5 | 6 | PathManager = PathManagerBase() 7 | """ 8 | This is a detectron2 project-specific PathManager. 9 | We try to stay away from global PathManager in fvcore as it 10 | introduces potential conflicts among other libraries. 11 | """ 12 | 13 | 14 | class Detectron2Handler(PathHandler): 15 | """ 16 | Resolve anything that's hosted under detectron2's namespace. 17 | """ 18 | 19 | PREFIX = "detectron2://" 20 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" 21 | 22 | def _get_supported_prefixes(self): 23 | return [self.PREFIX] 24 | 25 | def _get_local_path(self, path): 26 | name = path[len(self.PREFIX) :] 27 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) 28 | 29 | def _open(self, path, mode="r", **kwargs): 30 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 31 | 32 | 33 | PathManager.register_handler(HTTPURLHandler()) 34 | PathManager.register_handler(OneDrivePathHandler()) 35 | PathManager.register_handler(Detectron2Handler()) 36 | -------------------------------------------------------------------------------- /detectron2/utils/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | import torch 7 | 8 | __all__ = ["retry_if_cuda_oom"] 9 | 10 | 11 | @contextmanager 12 | def _ignore_torch_cuda_oom(): 13 | """ 14 | A context which ignores CUDA OOM exception from pytorch. 15 | """ 16 | try: 17 | yield 18 | except RuntimeError as e: 19 | # NOTE: the string may change? 20 | if "CUDA out of memory. " in str(e): 21 | pass 22 | else: 23 | raise 24 | 25 | 26 | def retry_if_cuda_oom(func): 27 | """ 28 | Makes a function retry itself after encountering 29 | pytorch's CUDA OOM error. 30 | It will first retry after calling `torch.cuda.empty_cache()`. 31 | 32 | If that still fails, it will then retry by trying to convert inputs to CPUs. 33 | In this case, it expects the function to dispatch to CPU implementation. 34 | The return values may become CPU tensors as well and it's user's 35 | responsibility to convert it back to CUDA tensor if needed. 36 | 37 | Args: 38 | func: a stateless callable that takes tensor-like objects as arguments 39 | 40 | Returns: 41 | a callable which retries `func` if OOM is encountered. 42 | 43 | Examples: 44 | :: 45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 46 | # output may be on CPU even if inputs are on GPU 47 | 48 | Note: 49 | 1. When converting inputs to CPU, it will only look at each argument and check 50 | if it has `.device` and `.to` for conversion. Nested structures of tensors 51 | are not supported. 52 | 53 | 2. Since the function might be called more than once, it has to be 54 | stateless. 55 | """ 56 | 57 | def maybe_to_cpu(x): 58 | try: 59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 60 | except AttributeError: 61 | like_gpu_tensor = False 62 | if like_gpu_tensor: 63 | return x.to(device="cpu") 64 | else: 65 | return x 66 | 67 | @wraps(func) 68 | def wrapped(*args, **kwargs): 69 | with _ignore_torch_cuda_oom(): 70 | return func(*args, **kwargs) 71 | 72 | # Clear cache and retry 73 | torch.cuda.empty_cache() 74 | with _ignore_torch_cuda_oom(): 75 | return func(*args, **kwargs) 76 | 77 | # Try on CPU. This slows down the code significantly, therefore print a notice. 78 | logger = logging.getLogger(__name__) 79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) 80 | new_args = (maybe_to_cpu(x) for x in args) 81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 82 | return func(*new_args, **new_kwargs) 83 | 84 | return wrapped 85 | -------------------------------------------------------------------------------- /detectron2/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Keep this module for backward compatibility. 4 | from fvcore.common.registry import Registry # noqa 5 | 6 | __all__ = ["Registry"] 7 | -------------------------------------------------------------------------------- /detectron2/utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /dev/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Some scripts for developers to use, include: 3 | 4 | - `linter.sh`: lint the codebase before commit 5 | - `run_{inference,instant}_tests.sh`: run inference/training for a few iterations. 6 | Note that these tests require 2 GPUs. 7 | - `parse_results.sh`: parse results from a log file. 8 | -------------------------------------------------------------------------------- /dev/linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Run this script at project root by "./dev/linter.sh" before you commit 5 | 6 | vergte() { 7 | [ "$2" = "$(echo -e "$1\\n$2" | sort -V | head -n1)" ] 8 | } 9 | 10 | { 11 | black --version | grep -E "(19.3b0.*6733274)|(19.3b0\\+8)" > /dev/null 12 | } || { 13 | echo "Linter requires 'black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2' !" 14 | exit 1 15 | } 16 | 17 | ISORT_TARGET_VERSION="4.3.21" 18 | ISORT_VERSION=$(isort -v | grep VERSION | awk '{print $2}') 19 | vergte "$ISORT_VERSION" "$ISORT_TARGET_VERSION" || { 20 | echo "Linter requires isort>=${ISORT_TARGET_VERSION} !" 21 | exit 1 22 | } 23 | 24 | set -v 25 | 26 | echo "Running isort ..." 27 | isort -y -sp . --atomic 28 | 29 | echo "Running black ..." 30 | black -l 100 . 31 | 32 | echo "Running flake8 ..." 33 | if [ -x "$(command -v flake8-3)" ]; then 34 | flake8-3 . 35 | else 36 | python3 -m flake8 . 37 | fi 38 | 39 | # echo "Running mypy ..." 40 | # Pytorch does not have enough type annotations 41 | # mypy detectron2/solver detectron2/structures detectron2/config 42 | 43 | echo "Running clang-format ..." 44 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i 45 | 46 | command -v arc > /dev/null && arc lint 47 | -------------------------------------------------------------------------------- /dev/packaging/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## To build a cu101 wheel for release: 3 | 4 | ``` 5 | $ nvidia-docker run -it --storage-opt "size=20GB" --name pt pytorch/manylinux-cuda101 6 | # inside the container: 7 | # git clone https://github.com/facebookresearch/detectron2/ 8 | # cd detectron2 9 | # export CU_VERSION=cu101 D2_VERSION_SUFFIX= PYTHON_VERSION=3.7 PYTORCH_VERSION=1.4 10 | # ./dev/packaging/build_wheel.sh 11 | ``` 12 | 13 | ## To build all wheels for `CUDA {9.2,10.0,10.1}` x `Python {3.6,3.7,3.8}`: 14 | ``` 15 | ./dev/packaging/build_all_wheels.sh 16 | ./dev/packaging/gen_wheel_index.sh /path/to/wheels 17 | ``` 18 | -------------------------------------------------------------------------------- /dev/packaging/build_all_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | [[ -d "dev/packaging" ]] || { 5 | echo "Please run this script at detectron2 root!" 6 | exit 1 7 | } 8 | 9 | build_one() { 10 | cu=$1 11 | pytorch_ver=$2 12 | 13 | case "$cu" in 14 | cu*) 15 | container_name=manylinux-cuda${cu/cu/} 16 | ;; 17 | cpu) 18 | container_name=manylinux-cuda101 19 | ;; 20 | *) 21 | echo "Unrecognized cu=$cu" 22 | exit 1 23 | ;; 24 | esac 25 | 26 | echo "Launching container $container_name ..." 27 | 28 | for py in 3.6 3.7 3.8; do 29 | docker run -itd \ 30 | --name $container_name \ 31 | --mount type=bind,source="$(pwd)",target=/detectron2 \ 32 | pytorch/$container_name 33 | 34 | cat </dev/null 2>&1 && pwd )" 8 | . "$script_dir/pkg_helpers.bash" 9 | 10 | echo "Build Settings:" 11 | echo "CU_VERSION: $CU_VERSION" # e.g. cu101 12 | echo "D2_VERSION_SUFFIX: $D2_VERSION_SUFFIX" # e.g. +cu101 or "" 13 | echo "PYTHON_VERSION: $PYTHON_VERSION" # e.g. 3.6 14 | echo "PYTORCH_VERSION: $PYTORCH_VERSION" # e.g. 1.4 15 | 16 | setup_cuda 17 | setup_wheel_python 18 | yum install ninja-build -y && ln -sv /usr/bin/ninja-build /usr/bin/ninja 19 | 20 | pip_install pip numpy -U 21 | pip_install "torch==$PYTORCH_VERSION" \ 22 | -f https://download.pytorch.org/whl/"$CU_VERSION"/torch_stable.html 23 | 24 | # use separate directories to allow parallel build 25 | BASE_BUILD_DIR=build/cu$CU_VERSION-py$PYTHON_VERSION-pt$PYTORCH_VERSION 26 | python setup.py \ 27 | build -b "$BASE_BUILD_DIR" \ 28 | bdist_wheel -b "$BASE_BUILD_DIR/build_dist" -d "wheels/$CU_VERSION/torch$PYTORCH_VERSION" 29 | rm -rf "$BASE_BUILD_DIR" 30 | -------------------------------------------------------------------------------- /dev/packaging/gen_install_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | 6 | template = """
install
\
 7 | python -m pip install detectron2{d2_version} -f \\
 8 |   https://dl.fbaipublicfiles.com/detectron2/wheels/{cuda}/torch{torch}/index.html
 9 | 
""" 10 | CUDA_SUFFIX = {"10.2": "cu102", "10.1": "cu101", "10.0": "cu100", "9.2": "cu92", "cpu": "cpu"} 11 | 12 | 13 | def gen_header(torch_versions): 14 | return '' + "".join( 15 | [ 16 | ''.format(t) 17 | for t in torch_versions 18 | ] 19 | ) 20 | 21 | 22 | if __name__ == "__main__": 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument("--d2-version", help="detectron2 version number, default to empty") 25 | args = parser.parse_args() 26 | d2_version = f"=={args.d2_version}" if args.d2_version else "" 27 | 28 | all_versions = [("1.4", k) for k in ["10.1", "10.0", "9.2", "cpu"]] + [ 29 | ("1.5", k) for k in ["10.2", "10.1", "9.2", "cpu"] 30 | ] 31 | 32 | torch_versions = sorted({k[0] for k in all_versions}, key=float, reverse=True) 33 | cuda_versions = sorted( 34 | {k[1] for k in all_versions}, key=lambda x: float(x) if x != "cpu" else 0, reverse=True 35 | ) 36 | 37 | table = gen_header(torch_versions) 38 | for cu in cuda_versions: 39 | table += f""" """ 40 | cu_suffix = CUDA_SUFFIX[cu] 41 | for torch in torch_versions: 42 | if (torch, cu) in all_versions: 43 | cell = template.format(d2_version=d2_version, cuda=cu_suffix, torch=torch) 44 | else: 45 | cell = "" 46 | table += f""" """ 47 | table += "" 48 | table += "
CUDA torch {}
{cu}{cell}
" 49 | print(table) 50 | -------------------------------------------------------------------------------- /dev/packaging/gen_wheel_index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | 5 | root=$1 6 | if [[ -z "$root" ]]; then 7 | echo "Usage: ./gen_wheel_index.sh /path/to/wheels" 8 | exit 9 | fi 10 | 11 | export LC_ALL=C # reproducible sort 12 | # NOTE: all sort in this script might not work when xx.10 is released 13 | 14 | index=$root/index.html 15 | 16 | cd "$root" 17 | for cu in cpu cu92 cu100 cu101 cu102; do 18 | cd "$root/$cu" 19 | echo "Creating $PWD/index.html ..." 20 | # First sort by torch version, then stable sort by d2 version with unique. 21 | # As a result, the latest torch version for each d2 version is kept. 22 | for whl in $(find -type f -name '*.whl' -printf '%P\n' \ 23 | | sort -k 1 -r | sort -t '/' -k 2 --stable -r --unique); do 24 | echo "$whl
" 25 | done > index.html 26 | 27 | 28 | for torch in torch*; do 29 | cd "$root/$cu/$torch" 30 | 31 | # list all whl for each cuda,torch version 32 | echo "Creating $PWD/index.html ..." 33 | for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do 34 | echo "$whl
" 35 | done > index.html 36 | done 37 | done 38 | 39 | cd "$root" 40 | # Just list everything: 41 | echo "Creating $index ..." 42 | for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do 43 | echo "$whl
" 44 | done > "$index" 45 | 46 | -------------------------------------------------------------------------------- /dev/packaging/pkg_helpers.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Function to retry functions that sometimes timeout or have flaky failures 5 | retry () { 6 | $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) 7 | } 8 | # Install with pip a bit more robustly than the default 9 | pip_install() { 10 | retry pip install --progress-bar off "$@" 11 | } 12 | 13 | 14 | setup_cuda() { 15 | # Now work out the CUDA settings 16 | # Like other torch domain libraries, we choose common GPU architectures only. 17 | export FORCE_CUDA=1 18 | case "$CU_VERSION" in 19 | cu102) 20 | export CUDA_HOME=/usr/local/cuda-10.2/ 21 | export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" 22 | ;; 23 | cu101) 24 | export CUDA_HOME=/usr/local/cuda-10.1/ 25 | export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" 26 | ;; 27 | cu100) 28 | export CUDA_HOME=/usr/local/cuda-10.0/ 29 | export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" 30 | ;; 31 | cu92) 32 | export CUDA_HOME=/usr/local/cuda-9.2/ 33 | export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX" 34 | ;; 35 | cpu) 36 | unset FORCE_CUDA 37 | export CUDA_VISIBLE_DEVICES= 38 | ;; 39 | *) 40 | echo "Unrecognized CU_VERSION=$CU_VERSION" 41 | exit 1 42 | ;; 43 | esac 44 | } 45 | 46 | setup_wheel_python() { 47 | case "$PYTHON_VERSION" in 48 | 3.6) python_abi=cp36-cp36m ;; 49 | 3.7) python_abi=cp37-cp37m ;; 50 | 3.8) python_abi=cp38-cp38 ;; 51 | *) 52 | echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" 53 | exit 1 54 | ;; 55 | esac 56 | export PATH="/opt/python/$python_abi/bin:$PATH" 57 | } 58 | -------------------------------------------------------------------------------- /dev/parse_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # A shell script that parses metrics from the log file. 5 | # Make it easier for developers to track performance of models. 6 | 7 | LOG="$1" 8 | 9 | if [[ -z "$LOG" ]]; then 10 | echo "Usage: $0 /path/to/log/file" 11 | exit 1 12 | fi 13 | 14 | # [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it) 15 | # [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices) 16 | # [12/15 11:49:03] inference INFO: Total inference pure compute time: ..... 17 | 18 | # training time 19 | trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*') 20 | echo "Training speed: $trainspeed s/it" 21 | 22 | # inference time: there could be multiple inference during training 23 | inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1) 24 | echo "Inference speed: $inferencespeed s/it" 25 | 26 | # [12/15 11:47:18] trainer INFO: eta: 0:00:00 iter: 90000 loss: 0.5407 (0.7256) loss_classifier: 0.1744 (0.2446) loss_box_reg: 0.0838 (0.1160) loss_mask: 0.2159 (0.2722) loss_objectness: 0.0244 (0.0429) loss_rpn_box_reg: 0.0279 (0.0500) time: 0.4487 (0.4899) data: 0.0076 (0.0975) lr: 0.000200 max mem: 4161 27 | memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*') 28 | echo "Training memory: $memory MB" 29 | 30 | echo "Easy to copypaste:" 31 | echo "$trainspeed","$inferencespeed","$memory" 32 | 33 | echo "------------------------------" 34 | 35 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox 36 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 37 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011 38 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm 39 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 40 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011 41 | 42 | echo "COCO Results:" 43 | num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l) 44 | # each task has 3 lines 45 | grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3)) 46 | -------------------------------------------------------------------------------- /dev/run_inference_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="inference_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | 10 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 11 | CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) 12 | fi 13 | 14 | echo "========================================================================" 15 | echo "Configs to run:" 16 | echo "${CFG_LIST[@]}" 17 | echo "========================================================================" 18 | 19 | 20 | for cfg in "${CFG_LIST[@]}"; do 21 | echo "========================================================================" 22 | echo "Running $cfg ..." 23 | echo "========================================================================" 24 | $BIN \ 25 | --eval-only \ 26 | --num-gpus $NUM_GPUS \ 27 | --config-file "$cfg" \ 28 | OUTPUT_DIR $OUTPUT 29 | rm -rf $OUTPUT 30 | done 31 | 32 | 33 | echo "========================================================================" 34 | echo "Running demo.py ..." 35 | echo "========================================================================" 36 | DEMO_BIN="python demo/demo.py" 37 | COCO_DIR=datasets/coco/val2014 38 | mkdir -pv $OUTPUT 39 | 40 | set -v 41 | 42 | $DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \ 43 | --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT 44 | rm -rf $OUTPUT 45 | -------------------------------------------------------------------------------- /dev/run_instant_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="instant_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 10 | CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) 11 | fi 12 | 13 | echo "========================================================================" 14 | echo "Configs to run:" 15 | echo "${CFG_LIST[@]}" 16 | echo "========================================================================" 17 | 18 | for cfg in "${CFG_LIST[@]}"; do 19 | echo "========================================================================" 20 | echo "Running $cfg ..." 21 | echo "========================================================================" 22 | $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ 23 | SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \ 24 | OUTPUT_DIR "$OUTPUT" 25 | rm -rf "$OUTPUT" 26 | done 27 | 28 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-devel 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | RUN apt-get update && apt-get install -y \ 5 | python3-opencv ca-certificates python3-dev git wget sudo \ 6 | cmake ninja-build protobuf-compiler libprotobuf-dev && \ 7 | rm -rf /var/lib/apt/lists/* 8 | RUN ln -sv /usr/bin/python3 /usr/bin/python 9 | 10 | # create a non-root user 11 | ARG USER_ID=1000 12 | RUN useradd -m --no-log-init --system --uid ${USER_ID} appuser -g sudo 13 | RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 14 | USER appuser 15 | WORKDIR /home/appuser 16 | 17 | ENV PATH="/home/appuser/.local/bin:${PATH}" 18 | RUN wget https://bootstrap.pypa.io/get-pip.py && \ 19 | python3 get-pip.py --user && \ 20 | rm get-pip.py 21 | 22 | # install dependencies 23 | # See https://pytorch.org/ for other options if you use a different version of CUDA 24 | RUN pip install --user tensorboard 25 | RUN pip install --user torch==1.5 torchvision==0.6 -f https://download.pytorch.org/whl/cu101/torch_stable.html 26 | 27 | RUN pip install --user 'git+https://github.com/facebookresearch/fvcore' 28 | # install detectron2 29 | RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo 30 | # set FORCE_CUDA because during `docker build` cuda is not accessible 31 | ENV FORCE_CUDA="1" 32 | # This will by default build detectron2 for all common cuda architectures and take a lot more time, 33 | # because inside `docker build`, there is no way to tell which architecture will be used. 34 | ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" 35 | ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" 36 | 37 | RUN pip install --user -e detectron2_repo 38 | 39 | # Set a fixed model cache directory. 40 | ENV FVCORE_CACHE="/tmp" 41 | WORKDIR /home/appuser/detectron2_repo 42 | 43 | # run detectron2 under user "appuser": 44 | # wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg 45 | # python3 demo/demo.py \ 46 | #--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ 47 | #--input input.jpg --output outputs/ \ 48 | #--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl 49 | -------------------------------------------------------------------------------- /docker/Dockerfile-circleci: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-devel 2 | # This dockerfile only aims to provide an environment for unittest on CircleCI 3 | 4 | ENV DEBIAN_FRONTEND noninteractive 5 | RUN apt-get update && apt-get install -y \ 6 | ca-certificates python3-dev git wget sudo ninja-build libglib2.0-0 && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | RUN wget -q https://bootstrap.pypa.io/get-pip.py && \ 10 | python3 get-pip.py && \ 11 | rm get-pip.py 12 | 13 | # install dependencies 14 | RUN pip install tensorboard opencv-python-headless 15 | ARG PYTORCH_VERSION 16 | ARG TORCHVISION_VERSION 17 | RUN pip install torch==${PYTORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/cu101/torch_stable.html 18 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Use the container (with docker ≥ 19.03) 3 | 4 | ``` 5 | cd docker/ 6 | # Build: 7 | docker build --build-arg USER_ID=$UID -t detectron2:v0 . 8 | # Run: 9 | docker run --gpus all -it \ 10 | --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ 11 | --name=detectron2 detectron2:v0 12 | 13 | # Grant docker access to host X server to show images 14 | xhost +local:`docker inspect --format='{{ .Config.Hostname }}' detectron2` 15 | ``` 16 | 17 | ## Use the container (with docker < 19.03) 18 | 19 | Install docker-compose and nvidia-docker2, then run: 20 | ``` 21 | cd docker && USER_ID=$UID docker-compose run detectron2 22 | ``` 23 | 24 | #### Using a persistent cache directory 25 | 26 | You can prevent models from being re-downloaded on every run, 27 | by storing them in a cache directory. 28 | 29 | To do this, add `--volume=$HOME/.torch/fvcore_cache:/tmp:rw` in the run command. 30 | 31 | ## Install new dependencies 32 | Add the following to `Dockerfile` to make persistent changes. 33 | ``` 34 | RUN sudo apt-get update && sudo apt-get install -y vim 35 | ``` 36 | Or run them in the container to make temporary changes. 37 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2.3" 2 | services: 3 | detectron2: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile 7 | args: 8 | USER_ID: ${USER_ID:-1000} 9 | runtime: nvidia # TODO: Exchange with "gpu: all" in the future (see https://github.com/facebookresearch/detectron2/pull/197/commits/00545e1f376918db4a8ce264d427a07c1e896c5a). 10 | shm_size: "8gb" 11 | ulimits: 12 | memlock: -1 13 | stack: 67108864 14 | volumes: 15 | - /tmp/.X11-unix:/tmp/.X11-unix:ro 16 | environment: 17 | - DISPLAY=$DISPLAY 18 | - NVIDIA_VISIBLE_DEVICES=all 19 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Read the docs: 2 | 3 | The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). 4 | Documents in this directory are not meant to be read on github. 5 | 6 | # Build the docs: 7 | 8 | 1. Install detectron2 according to [INSTALL.md](INSTALL.md). 9 | 2. Install additional libraries required to build docs: 10 | - docutils==0.16 11 | - Sphinx==3.0.0 12 | - recommonmark==0.6.0 13 | - sphinx_rtd_theme 14 | - mock 15 | 16 | 3. Run `make html` from this directory. 17 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* 2 | * some extra css to make markdown look similar between github/sphinx 3 | */ 4 | 5 | /* 6 | * Below is for install.md: 7 | */ 8 | .rst-content code { 9 | white-space: pre; 10 | border: 0px; 11 | } 12 | 13 | th { 14 | border: 1px solid #e1e4e5; 15 | } 16 | 17 | div.section > details { 18 | padding-bottom: 1em; 19 | } 20 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. detectron2 documentation master file, created by 2 | sphinx-quickstart on Sat Sep 21 13:46:45 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to detectron2's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | tutorials/index 13 | notes/index 14 | modules/index 15 | -------------------------------------------------------------------------------- /docs/modules/checkpoint.rst: -------------------------------------------------------------------------------- 1 | detectron2.checkpoint package 2 | ============================= 3 | 4 | .. automodule:: detectron2.checkpoint 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/config.rst: -------------------------------------------------------------------------------- 1 | detectron2.config package 2 | ========================= 3 | 4 | .. automodule:: detectron2.config 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | 11 | Config References 12 | ----------------- 13 | 14 | .. literalinclude:: ../../detectron2/config/defaults.py 15 | :language: python 16 | :linenos: 17 | :lines: 4- 18 | -------------------------------------------------------------------------------- /docs/modules/data.rst: -------------------------------------------------------------------------------- 1 | detectron2.data package 2 | ======================= 3 | 4 | .. automodule:: detectron2.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | detectron2.data.detection\_utils module 10 | --------------------------------------- 11 | 12 | .. automodule:: detectron2.data.detection_utils 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | detectron2.data.datasets module 18 | --------------------------------------- 19 | 20 | .. automodule:: detectron2.data.datasets 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | detectron2.data.samplers module 26 | --------------------------------------- 27 | 28 | .. automodule:: detectron2.data.samplers 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | 34 | detectron2.data.transforms module 35 | --------------------------------------- 36 | 37 | .. automodule:: detectron2.data.transforms 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | -------------------------------------------------------------------------------- /docs/modules/engine.rst: -------------------------------------------------------------------------------- 1 | detectron2.engine package 2 | ========================= 3 | 4 | 5 | .. automodule:: detectron2.engine 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | 11 | detectron2.engine.defaults module 12 | --------------------------------- 13 | 14 | .. automodule:: detectron2.engine.defaults 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | detectron2.engine.hooks module 20 | --------------------------------- 21 | 22 | .. automodule:: detectron2.engine.hooks 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | -------------------------------------------------------------------------------- /docs/modules/evaluation.rst: -------------------------------------------------------------------------------- 1 | detectron2.evaluation package 2 | ============================= 3 | 4 | .. automodule:: detectron2.evaluation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/export.rst: -------------------------------------------------------------------------------- 1 | detectron2.export package 2 | ========================= 3 | 4 | .. automodule:: detectron2.export 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/index.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================== 3 | 4 | .. toctree:: 5 | 6 | checkpoint 7 | config 8 | data 9 | engine 10 | evaluation 11 | layers 12 | model_zoo 13 | modeling 14 | solver 15 | structures 16 | utils 17 | export 18 | -------------------------------------------------------------------------------- /docs/modules/layers.rst: -------------------------------------------------------------------------------- 1 | detectron2.layers package 2 | ========================= 3 | 4 | .. automodule:: detectron2.layers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/model_zoo.rst: -------------------------------------------------------------------------------- 1 | detectron2.model_zoo package 2 | ============================ 3 | 4 | .. automodule:: detectron2.model_zoo 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/modeling.rst: -------------------------------------------------------------------------------- 1 | detectron2.modeling package 2 | =========================== 3 | 4 | .. automodule:: detectron2.modeling 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | detectron2.modeling.poolers module 11 | --------------------------------------- 12 | 13 | .. automodule:: detectron2.modeling.poolers 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | 19 | detectron2.modeling.sampling module 20 | ------------------------------------ 21 | 22 | .. automodule:: detectron2.modeling.sampling 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | 28 | detectron2.modeling.box_regression module 29 | ------------------------------------------ 30 | 31 | .. automodule:: detectron2.modeling.box_regression 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | 37 | Model Registries 38 | ----------------- 39 | 40 | These are different registries provided in modeling. 41 | Each registry provide you the ability to replace it with your customized component, 42 | without having to modify detectron2's code. 43 | 44 | Note that it is impossible to allow users to customize any line of code directly. 45 | Even just to add one line at some place, 46 | you'll likely need to find out the smallest registry which contains that line, 47 | and register your component to that registry. 48 | 49 | 50 | .. autodata:: detectron2.modeling.META_ARCH_REGISTRY 51 | .. autodata:: detectron2.modeling.BACKBONE_REGISTRY 52 | .. autodata:: detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY 53 | .. autodata:: detectron2.modeling.RPN_HEAD_REGISTRY 54 | .. autodata:: detectron2.modeling.ANCHOR_GENERATOR_REGISTRY 55 | .. autodata:: detectron2.modeling.ROI_HEADS_REGISTRY 56 | .. autodata:: detectron2.modeling.ROI_BOX_HEAD_REGISTRY 57 | .. autodata:: detectron2.modeling.ROI_MASK_HEAD_REGISTRY 58 | .. autodata:: detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY 59 | -------------------------------------------------------------------------------- /docs/modules/solver.rst: -------------------------------------------------------------------------------- 1 | detectron2.solver package 2 | ========================= 3 | 4 | .. automodule:: detectron2.solver 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/structures.rst: -------------------------------------------------------------------------------- 1 | detectron2.structures package 2 | ============================= 3 | 4 | .. automodule:: detectron2.structures 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/utils.rst: -------------------------------------------------------------------------------- 1 | detectron2.utils package 2 | ======================== 3 | 4 | detectron2.utils.colormap module 5 | -------------------------------- 6 | 7 | .. automodule:: detectron2.utils.colormap 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | detectron2.utils.comm module 13 | ---------------------------- 14 | 15 | .. automodule:: detectron2.utils.comm 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 21 | detectron2.utils.events module 22 | ------------------------------ 23 | 24 | .. automodule:: detectron2.utils.events 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | detectron2.utils.logger module 31 | ------------------------------ 32 | 33 | .. automodule:: detectron2.utils.logger 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | detectron2.utils.registry module 40 | -------------------------------- 41 | 42 | .. automodule:: detectron2.utils.registry 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | detectron2.utils.memory module 48 | ---------------------------------- 49 | 50 | .. automodule:: detectron2.utils.memory 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | detectron2.utils.analysis module 57 | ---------------------------------- 58 | 59 | .. automodule:: detectron2.utils.analysis 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | 65 | detectron2.utils.visualizer module 66 | ---------------------------------- 67 | 68 | .. automodule:: detectron2.utils.visualizer 69 | :members: 70 | :undoc-members: 71 | :show-inheritance: 72 | 73 | detectron2.utils.video\_visualizer module 74 | ----------------------------------------- 75 | 76 | .. automodule:: detectron2.utils.video_visualizer 77 | :members: 78 | :undoc-members: 79 | :show-inheritance: 80 | 81 | -------------------------------------------------------------------------------- /docs/notes/changelog.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ### Releases 4 | See release log at 5 | [https://github.com/facebookresearch/detectron2/releases](https://github.com/facebookresearch/detectron2/releases). 6 | 7 | ### Notable Backward Incompatible Changes: 8 | 9 | * 03/30/2020: Custom box head's `output_size` changed to `output_shape`. 10 | * 02/14/2020,02/18/2020: Mask head and keypoint head now include logic for losses & inference. Custom heads 11 | should overwrite the feature computation by `layers()` method. 12 | * 11/11/2019: `detectron2.data.detection_utils.read_image` transposes images with exif information. 13 | 14 | ### Config Version Change Log 15 | 16 | * v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`. 17 | * v2: A batch of rename of many configurations before release. 18 | 19 | ### Silent Regression in Historical Versions: 20 | 21 | We list a few silent regressions since they may silently produce incorrect results and will be hard to debug. 22 | 23 | * 04/01/2020 - 05/11/2020: Bad accuracy if `TRAIN_ON_PRED_BOXES` is set to True. 24 | * 03/30/2020 - 04/01/2020: ResNets are not correctly built. 25 | * 12/19/2019 - 12/26/2019: Using aspect ratio grouping causes a drop in accuracy. 26 | * release - 11/9/2019: Test time augmentation does not predict the last category. 27 | -------------------------------------------------------------------------------- /docs/notes/index.rst: -------------------------------------------------------------------------------- 1 | Notes 2 | ====================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | benchmarks 8 | compatibility 9 | contributing 10 | changelog 11 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor 2 | numpy 3 | tqdm 4 | docutils==0.16 5 | # https://github.com/sphinx-doc/sphinx/commit/7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d 6 | git+git://github.com/sphinx-doc/sphinx.git@7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d 7 | recommonmark==0.6.0 8 | sphinx_rtd_theme 9 | mock 10 | matplotlib 11 | termcolor 12 | yacs 13 | tabulate 14 | cloudpickle 15 | Pillow 16 | future 17 | requests 18 | six 19 | git+git://github.com/facebookresearch/fvcore.git 20 | https://download.pytorch.org/whl/cpu/torch-1.5.0%2Bcpu-cp37-cp37m-linux_x86_64.whl 21 | https://download.pytorch.org/whl/cpu/torchvision-0.6.0%2Bcpu-cp37-cp37m-linux_x86_64.whl 22 | -------------------------------------------------------------------------------- /docs/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # Read the docs: 2 | 3 | The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). 4 | Documents in this directory are not meant to be read on github. 5 | -------------------------------------------------------------------------------- /docs/tutorials/builtin_datasets.md: -------------------------------------------------------------------------------- 1 | # Setup Builtin Datasets 2 | 3 | Detectron2 has builtin support for a few datasets. 4 | The datasets are assumed to exist in a directory specified by the environment variable 5 | `DETECTRON2_DATASETS`. 6 | Under this directory, detectron2 will look for datasets in the structure described below, if needed. 7 | ``` 8 | $DETECTRON2_DATASETS/ 9 | coco/ 10 | lvis/ 11 | cityscapes/ 12 | VOC20{07,12}/ 13 | ``` 14 | 15 | You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. 16 | If left unset, the default is `./datasets` relative to your current working directory. 17 | 18 | The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) 19 | contains configs and models that use these builtin datasets. 20 | 21 | ## Expected dataset structure for COCO instance/keypoint detection: 22 | 23 | ``` 24 | coco/ 25 | annotations/ 26 | instances_{train,val}2017.json 27 | person_keypoints_{train,val}2017.json 28 | {train,val}2017/ 29 | # image files that are mentioned in the corresponding json 30 | ``` 31 | 32 | You can use the 2014 version of the dataset as well. 33 | 34 | Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, 35 | which you can download with `./prepare_for_tests.sh`. 36 | 37 | ## Expected dataset structure for PanopticFPN: 38 | 39 | ``` 40 | coco/ 41 | annotations/ 42 | panoptic_{train,val}2017.json 43 | panoptic_{train,val}2017/ # png annotations 44 | panoptic_stuff_{train,val}2017/ # generated by the script mentioned below 45 | ``` 46 | 47 | Install panopticapi by: 48 | ``` 49 | pip install git+https://github.com/cocodataset/panopticapi.git 50 | ``` 51 | Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. 52 | 53 | ## Expected dataset structure for LVIS instance segmentation: 54 | ``` 55 | coco/ 56 | {train,val,test}2017/ 57 | lvis/ 58 | lvis_v0.5_{train,val}.json 59 | lvis_v0.5_image_info_test.json 60 | ``` 61 | 62 | Install lvis-api by: 63 | ``` 64 | pip install git+https://github.com/lvis-dataset/lvis-api.git 65 | ``` 66 | 67 | Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations, which can be used to evaluate models trained on the COCO dataset. 68 | 69 | ## Expected dataset structure for cityscapes: 70 | ``` 71 | cityscapes/ 72 | gtFine/ 73 | train/ 74 | aachen/ 75 | color.png, instanceIds.png, labelIds.png, polygons.json, 76 | labelTrainIds.png 77 | ... 78 | val/ 79 | test/ 80 | leftImg8bit/ 81 | train/ 82 | val/ 83 | test/ 84 | ``` 85 | Install cityscapes scripts by: 86 | ``` 87 | pip install git+https://github.com/mcordts/cityscapesScripts.git 88 | ``` 89 | 90 | Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with: 91 | ``` 92 | CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py 93 | ``` 94 | These files are not needed for instance segmentation. 95 | 96 | ## Expected dataset structure for Pascal VOC: 97 | ``` 98 | VOC20{07,12}/ 99 | Annotations/ 100 | ImageSets/ 101 | Main/ 102 | trainval.txt 103 | test.txt 104 | # train.txt or val.txt, if you use these splits 105 | JPEGImages/ 106 | ``` 107 | -------------------------------------------------------------------------------- /docs/tutorials/configs.md: -------------------------------------------------------------------------------- 1 | # Configs 2 | 3 | Detectron2 provides a key-value based config system that can be 4 | used to obtain standard, common behaviors. 5 | 6 | Detectron2's config system uses YAML and [yacs](https://github.com/rbgirshick/yacs). 7 | In addition to the [basic operations](../modules/config.html#detectron2.config.CfgNode) 8 | that access and update a config, we provide the following extra functionalities: 9 | 10 | 1. The config can have `_BASE_: base.yaml` field, which will load a base config first. 11 | Values in the base config will be overwritten in sub-configs, if there are any conflicts. 12 | We provided several base configs for standard model architectures. 13 | 2. We provide config versioning, for backward compatibility. 14 | If your config file is versioned with a config line like `VERSION: 2`, 15 | detectron2 will still recognize it even if we change some keys in the future. 16 | 17 | Config file is a very limited language. 18 | We do not expect all features in detectron2 to be available through configs. 19 | If you need something that's not available in the config space, 20 | please write code using detectron2's API. 21 | 22 | ### Basic Usage 23 | 24 | Some basic usage of the `CfgNode` object is shown here. See more in [documentation](../modules/config.html#detectron2.config.CfgNode). 25 | ```python 26 | from detectron2.config import get_cfg 27 | cfg = get_cfg() # obtain detectron2's default config 28 | cfg.xxx = yyy # add new configs for your own custom components 29 | cfg.merge_from_file("my_cfg.yaml") # load values from a file 30 | 31 | cfg.merge_from_list(["MODEL.WEIGHTS", "weights.pth"]) # can also load values from a list of str 32 | print(cfg.dump()) # print formatted configs 33 | ``` 34 | 35 | Many builtin tools in detectron2 accept command line config overwrite: 36 | Key-value pairs provided in the command line will overwrite the existing values in the config file. 37 | For example, [demo.py](../../demo/demo.py) can be used with 38 | ``` 39 | ./demo.py --config-file config.yaml [--other-options] \ 40 | --opts MODEL.WEIGHTS /path/to/weights INPUT.MIN_SIZE_TEST 1000 41 | ``` 42 | 43 | To see a list of available configs in detectron2 and what they mean, 44 | check [Config References](../modules/config.html#config-references) 45 | 46 | 47 | ### Best Practice with Configs 48 | 49 | 1. Treat the configs you write as "code": avoid copying them or duplicating them; use `_BASE_` 50 | to share common parts between configs. 51 | 52 | 2. Keep the configs you write simple: don't include keys that do not affect the experimental setting. 53 | 54 | 3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`, 55 | for backward compatibility. 56 | We print a warning when reading a config without version number. 57 | The official configs do not include version number because they are meant to 58 | be always up-to-date. 59 | -------------------------------------------------------------------------------- /docs/tutorials/evaluation.md: -------------------------------------------------------------------------------- 1 | 2 | # Evaluation 3 | 4 | Evaluation is a process that takes a number of inputs/outputs pairs and aggregate them. 5 | You can always [use the model](./models.md) directly and just parse its inputs/outputs manually to perform 6 | evaluation. 7 | Alternatively, evaluation is implemented in detectron2 using the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator) 8 | interface. 9 | 10 | Detectron2 includes a few `DatasetEvaluator` that computes metrics using standard dataset-specific 11 | APIs (e.g., COCO, LVIS). 12 | You can also implement your own `DatasetEvaluator` that performs some other jobs 13 | using the inputs/outputs pairs. 14 | For example, to count how many instances are detected on the validation set: 15 | 16 | ``` 17 | class Counter(DatasetEvaluator): 18 | def reset(self): 19 | self.count = 0 20 | def process(self, inputs, outputs): 21 | for output in outputs: 22 | self.count += len(output["instances"]) 23 | def evaluate(self): 24 | # save self.count somewhere, or print it, or return it. 25 | return {"count": self.count} 26 | ``` 27 | 28 | ## Use evaluators 29 | 30 | To evaluate using the methods of evaluators manually: 31 | ``` 32 | def get_all_inputs_outputs(): 33 | for data in data_loader: 34 | yield data, model(data) 35 | 36 | evaluator.reset() 37 | for inputs, outputs in get_all_inputs_outputs(): 38 | evaluator.process(inputs, outputs) 39 | eval_results = evaluator.evaluate() 40 | ``` 41 | 42 | Evaluators can also be used with [inference_on_dataset](../modules/evaluation.html#detectron2.evaluation.inference_on_dataset). 43 | For example, 44 | 45 | ```python 46 | eval_results = inference_on_dataset( 47 | model, 48 | data_loader, 49 | DatasetEvaluators([COCOEvaluator(...), Counter()])) 50 | ``` 51 | This will execute `model` on all inputs from `data_loader`, and call evaluator to process them. 52 | 53 | Compared to running the evaluation manually using the model, the benefit of this function is that 54 | evaluators can be merged together using [DatasetEvaluators](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluators), 55 | and all the evaluation can finish in one forward pass over the dataset. 56 | This function also provides accurate speed benchmarks for the given model and dataset. 57 | 58 | ## Evaluators for custom dataset 59 | 60 | Many evaluators in detectron2 are made for specific datasets, 61 | in order to obtain scores using each dataset's official API. 62 | In addition to that, two evaluators are able to evaluate any generic dataset 63 | that follows detectron2's [standard dataset format](./datasets.md), so they 64 | can be used to evaluate custom datasets: 65 | 66 | * [COCOEvaluator](../modules/evaluation.html#detectron2.evaluation.COCOEvaluator) is able to evaluate AP (Average Precision) for box detection, 67 | instance segmentation, keypoint detection on any custom dataset. 68 | * [SemSegEvaluator](../modules/evaluation.html#detectron2.evaluation.SemSegEvaluator) is able to evaluate semantic segmentation metrics on any custom dataset. 69 | -------------------------------------------------------------------------------- /docs/tutorials/extend.md: -------------------------------------------------------------------------------- 1 | # Extend Detectron2's Defaults 2 | 3 | __Research is about doing things in new ways__. 4 | This brings a tension in how to create abstractions in code, 5 | which is a challenge for any research engineering project of a significant size: 6 | 7 | 1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing 8 | everything in new ways. It should be reasonably easy to break existing 9 | abstractions and replace them with new ones. 10 | 11 | 2. On the other hand, such a project also needs reasonably high-level 12 | abstractions, so that users can easily do things in standard ways, 13 | without worrying too much about the details that only certain researchers care about. 14 | 15 | In detectron2, there are two types of interfaces that address this tension together: 16 | 17 | 1. Functions and classes that take a config (`cfg`) argument 18 | (sometimes with only a few extra arguments). 19 | 20 | Such functions and classes implement 21 | the "standard default" behavior: it will read what it needs from the 22 | config and do the "standard" thing. 23 | Users only need to load a given config and pass it around, without having to worry about 24 | which arguments are used and what they all mean. 25 | 26 | 2. Functions and classes that have well-defined explicit arguments. 27 | 28 | Each of these is a small building block of the entire system. 29 | They require users' expertise to understand what each argument should be, 30 | and require more effort to stitch together to a larger system. 31 | But they can be stitched together in more flexible ways. 32 | 33 | When you need to implement something not supported by the "standard defaults" 34 | included in detectron2, these well-defined components can be reused. 35 | 36 | 3. (experimental) A few classes are implemented with the 37 | [@configurable](../../modules/config.html#detectron2.config.configurable) 38 | decorator - they can be called with either a config, or with explicit arguments. 39 | Their explicit argument interfaces are currently __experimental__ and subject to change. 40 | 41 | 42 | If you only need the standard behavior, the [Beginner's Tutorial](./getting_started.md) 43 | should suffice. If you need to extend detectron2 to your own needs, 44 | see the following tutorials for more details: 45 | 46 | * Detectron2 includes a few standard datasets. To use custom ones, see 47 | [Use Custom Datasets](./datasets.md). 48 | * Detectron2 contains the standard logic that creates a data loader for training/testing from a 49 | dataset, but you can write your own as well. See [Use Custom Data Loaders](./data_loading.md). 50 | * Detectron2 implements many standard detection models, and provide ways for you 51 | to overwrite their behaviors. See [Use Models](./models.md) and [Write Models](./write-models.md). 52 | * Detectron2 provides a default training loop that is good for common training tasks. 53 | You can customize it with hooks, or write your own loop instead. See [training](./training.md). 54 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ====================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | install 8 | getting_started 9 | builtin_datasets 10 | extend 11 | datasets 12 | data_loading 13 | models 14 | write-models 15 | training 16 | evaluation 17 | configs 18 | deployment 19 | -------------------------------------------------------------------------------- /docs/tutorials/training.md: -------------------------------------------------------------------------------- 1 | # Training 2 | 3 | From the previous tutorials, you may now have a custom model and a data loader. 4 | To run training, users typically have a preference in one of the following two styles: 5 | 6 | ### Custom Training Loop 7 | 8 | With a model and a data loader ready, everything else needed to write a training loop can 9 | be found in PyTorch, and you are free to write the training loop yourself. 10 | This style allows researchers to manage the entire training logic more clearly and have full control. 11 | One such example is provided in [tools/plain_train_net.py](../../tools/plain_train_net.py). 12 | 13 | Any customization on the training logic is then easily controlled by the user. 14 | 15 | ### Trainer Abstraction 16 | 17 | We also provide a standarized "trainer" abstraction with a 18 | hook system that helps simplify the standard training behavior. 19 | It includes the following two instantiations: 20 | 21 | * [SimpleTrainer](../modules/engine.html#detectron2.engine.SimpleTrainer) 22 | provides a minimal training loop for single-cost single-optimizer single-data-source training, with nothing else. 23 | Other tasks (checkpointing, logging, etc) can be implemented using 24 | [the hook system](../modules/engine.html#detectron2.engine.HookBase). 25 | * [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) is a `SimpleTrainer` initialized from a config, used by 26 | [tools/train_net.py](../../tools/train_net.py) and many scripts. 27 | It includes more standard default behaviors that one might want to opt in, 28 | including default configurations for optimizer, learning rate schedule, 29 | logging, evaluation, checkpointing etc. 30 | 31 | To customize a `DefaultTrainer`: 32 | 33 | 1. For simple customizations (e.g. change optimizer, evaluator, LR scheduler, data loader, etc.), overwrite [its methods](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) in a subclass, just like [tools/train_net.py](../../tools/train_net.py). 34 | 2. Using a trainer+hook system means there will always be some non-standard behaviors that cannot be supported, especially in research. 35 | For more complicated tasks during training, see if 36 | [the hook system](../modules/engine.html#detectron2.engine.HookBase) can support it, or 37 | start from [tools/plain_train_net.py](../../tools/plain_train_net.py) to implement the training logic manually. 38 | 39 | ### Logging of Metrics 40 | 41 | During training, detectron2 models and trainer put metrics to a centralized [EventStorage](../modules/utils.html#detectron2.utils.events.EventStorage). 42 | You can use the following code to access it and log metrics to it: 43 | ``` 44 | from detectron2.utils.events import get_event_storage 45 | 46 | # inside the model: 47 | if self.training: 48 | value = # compute the value from inputs 49 | storage = get_event_storage() 50 | storage.put_scalar("some_accuracy", value) 51 | ``` 52 | 53 | Refer to its documentation for more details. 54 | 55 | Metrics are then written to various destinations with [EventWriter](../modules/utils.html#module-detectron2.utils.events). 56 | DefaultTrainer enables a few `EventWriter` with default configurations. 57 | See above for how to customize them. 58 | -------------------------------------------------------------------------------- /docs/tutorials/write-models.md: -------------------------------------------------------------------------------- 1 | # Write Models 2 | 3 | If you are trying to do something completely new, you may wish to implement 4 | a model entirely from scratch within detectron2. However, in many situations you may 5 | be interested in modifying or extending some components of an existing model. 6 | Therefore, we also provide a registration mechanism that lets you override the 7 | behavior of certain internal components of standard models. 8 | 9 | For example, to add a new backbone, import this code in your code: 10 | ```python 11 | from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec 12 | 13 | @BACKBONE_REGISTRY.register() 14 | class ToyBackBone(Backbone): 15 | def __init__(self, cfg, input_shape): 16 | super().__init__() 17 | # create your own backbone 18 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=16, padding=3) 19 | 20 | def forward(self, image): 21 | return {"conv1": self.conv1(image)} 22 | 23 | def output_shape(self): 24 | return {"conv1": ShapeSpec(channels=64, stride=16)} 25 | ``` 26 | Then, you can use `cfg.MODEL.BACKBONE.NAME = 'ToyBackBone'` in your config object. 27 | `build_model(cfg)` will then call your `ToyBackBone` instead. 28 | 29 | As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture, 30 | you can implement a new 31 | [ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`. 32 | See [densepose in detectron2](../../projects/DensePose) 33 | and [meshrcnn](https://github.com/facebookresearch/meshrcnn) 34 | for examples that implement new ROIHeads to perform new tasks. 35 | And [projects/](../../projects/) 36 | contains more examples that implement different architectures. 37 | 38 | A complete list of registries can be found in [API documentation](../modules/modeling.html#model-registries). 39 | You can register components in these registries to customize different parts of a model, or the 40 | entire model. 41 | -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Modified by Peize Sun, Rufeng Zhang 3 | # Contact: {sunpeize, cxrfzhang}@foxmail.com 4 | # 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 6 | from .config import add_sparsercnn_config 7 | from .detector import SparseRCNN 8 | from .dataset_mapper import SparseRCNNDatasetMapper 9 | -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/config.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from Sparse-RCNN(github: https://github.com/PeizeSun/SparseR-CNN) created by Peize Sun, Rufeng Zhang 6 | # Contact: {sunpeize, cxrfzhang}@foxmail.com 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | from detectron2.config import CfgNode as CN 10 | import os, sys 11 | import pdb, getpass 12 | import os.path as osp 13 | 14 | def add_sparsercnn_config(cfg): 15 | """ 16 | Add config for SparseRCNN. 17 | """ 18 | cfg.MODEL.SparseRCNN = CN() 19 | cfg.MODEL.SparseRCNN.NUM_CLASSES = 80 20 | cfg.MODEL.SparseRCNN.NUM_PROPOSALS = 300 21 | 22 | # RCNN Head. 23 | cfg.MODEL.SparseRCNN.NHEADS = 8 24 | cfg.MODEL.SparseRCNN.DROPOUT = 0.0 25 | cfg.MODEL.SparseRCNN.DIM_FEEDFORWARD = 2048 26 | cfg.MODEL.SparseRCNN.ACTIVATION = 'relu' 27 | cfg.MODEL.SparseRCNN.HIDDEN_DIM = 256 28 | cfg.MODEL.SparseRCNN.NUM_CLS = 1 29 | cfg.MODEL.SparseRCNN.NUM_REG = 3 30 | cfg.MODEL.SparseRCNN.NUM_HEADS = 6 31 | 32 | # Dynamic Conv. 33 | cfg.MODEL.SparseRCNN.NUM_DYNAMIC = 2 34 | cfg.MODEL.SparseRCNN.DIM_DYNAMIC = 64 35 | 36 | # Loss. 37 | cfg.MODEL.SparseRCNN.CLASS_WEIGHT = 2.0 38 | cfg.MODEL.SparseRCNN.GIOU_WEIGHT = 2.0 39 | cfg.MODEL.SparseRCNN.L1_WEIGHT = 5.0 40 | cfg.MODEL.SparseRCNN.DEEP_SUPERVISION = True 41 | cfg.MODEL.SparseRCNN.NO_OBJECT_WEIGHT = 0.1 42 | cfg.MODEL.SparseRCNN.IGNORE_THR = 0.7 43 | 44 | # ITER 45 | cfg.MODEL.SparseRCNN.WATERSHED = 5 46 | cfg.MODEL.SparseRCNN.RELATION_IOU_THR = 0.4 47 | # cfg.MODEL.SparseRCNN.IOA_THR= 0.7 48 | cfg.MODEL.SparseRCNN.CONFIDENCE_THR = 0.7 49 | cfg.MODEL.SparseRCNN.ITER_NUM = 1 50 | cfg.MODEL.SparseRCNN.LOW_CONFIDENCE_THR= 0.05 51 | 52 | # Focal Loss. 53 | cfg.MODEL.SparseRCNN.USE_FOCAL = True 54 | cfg.MODEL.SparseRCNN.ALPHA = 0.25 55 | cfg.MODEL.SparseRCNN.GAMMA = 2.0 56 | cfg.MODEL.SparseRCNN.PRIOR_PROB = 0.01 57 | 58 | # Optimizer. 59 | cfg.SOLVER.OPTIMIZER = "ADAMW" 60 | cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0 61 | 62 | def add_path(path): 63 | if path not in sys.path: 64 | sys.path.insert(0, path) 65 | 66 | root_dir = '../../..' 67 | add_path(osp.join(root_dir, 'utils')) 68 | 69 | class Config: 70 | 71 | user = getpass.getuser() 72 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 73 | 74 | output_dir = osp.join(this_model_dir, 'output', '50e.6h.500pro.ignore') 75 | 76 | model_dir = output_dir 77 | eval_dir = osp.join(output_dir, 'inference') 78 | 79 | imgDir = '/home/zhenganlin/june/CrowdHuman/images' 80 | json_dir = '/home/zhenganlin/june/CrowdHuman/annotation_sparse-rcnn' 81 | train_json = osp.join(json_dir, 'train.json') 82 | eval_json = osp.join(json_dir, 'val.json') 83 | 84 | dirpath = '/home/zhenganlin/june/CrowdHuman' 85 | train_file = osp.join(dirpath,'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 86 | anno_file = osp.join(dirpath, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 87 | 88 | config = Config() -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/configs/50e.6h.500pro.ignore.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "sparsercnn.crowdhuman.res50.500pro.50e.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | SparseRCNN: 5 | NUM_HEADS: 6 6 | IGNORE_THR: 0.7 7 | WATERSHED: 5 8 | RELATION_IOU_THR: 0.4 9 | CONFIDENCE_THR: 0.7 10 | ITER_NUM: 1 11 | LOW_CONFIDENCE_THR: 0.05 12 | SOLVER: 13 | IMS_PER_BATCH: 16 14 | BASE_LR: 0.00005 # base learning rate should be double if batch size of images is twice as much. 15 | STEPS: (37500,) # 15000 / 16 = 937.5 16 | MAX_ITER: 55000 #46875 17 | CHECKPOINT_PERIOD: 5000 18 | TEST: 19 | EVAL_PERIOD: 5000 20 | OUTPUT_DIR: "output" 21 | -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/configs/Base-SparseRCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SparseRCNN" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | FPN: 11 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 12 | ROI_HEADS: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 14 | ROI_BOX_HEAD: 15 | POOLER_TYPE: "ROIAlignV2" 16 | POOLER_RESOLUTION: 7 17 | POOLER_SAMPLING_RATIO: 2 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.000025 21 | STEPS: (210000, 250000) 22 | MAX_ITER: 270000 23 | WARMUP_FACTOR: 0.01 24 | WARMUP_ITERS: 1000 25 | WEIGHT_DECAY: 0.0001 26 | OPTIMIZER: "ADAMW" 27 | BACKBONE_MULTIPLIER: 1.0 # keep same with BASE_LR. 28 | CLIP_GRADIENTS: 29 | ENABLED: True 30 | CLIP_TYPE: "full_model" 31 | CLIP_VALUE: 1.0 32 | NORM_TYPE: 2.0 33 | SEED: 40244023 34 | INPUT: 35 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 36 | CROP: 37 | ENABLED: False 38 | TYPE: "absolute_range" 39 | SIZE: (384, 600) 40 | FORMAT: "RGB" 41 | TEST: 42 | EVAL_PERIOD: 7330 43 | DATALOADER: 44 | FILTER_EMPTY_ANNOTATIONS: False 45 | NUM_WORKERS: 3 46 | VERSION: 2 47 | -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/configs/sparsercnn.crowdhuman.res50.500pro.50e.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseRCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | STRIDE_IN_1X1: False 7 | SparseRCNN: 8 | NUM_PROPOSALS: 500 9 | NUM_CLASSES: 1 10 | DATASETS: 11 | TRAIN: ("CrowdHuman_train",) 12 | TEST: ("CrowdHuman_val",) 13 | SOLVER: 14 | STEPS: (37500,) # 15000 / 16 = 937.5 15 | MAX_ITER: 46875 16 | CHECKPOINT_PERIOD: 5000 17 | TEST: 18 | EVAL_PERIOD: 5000 19 | INPUT: 20 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 21 | MAX_SIZE_TRAIN: 1500 22 | MIN_SIZE_TEST: 800 23 | MAX_SIZE_TRAIN: 1500 24 | CROP: 25 | ENABLED: False 26 | FORMAT: "RGB" 27 | OUTPUT_DIR: "output/50e.6h.500pro" -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/models/dynamic_conv.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from Sparse-RCNN(github: https://github.com/PeizeSun/SparseR-CNN) created by Peize Sun, Rufeng Zhang 6 | # Contact: {sunpeize, cxrfzhang}@foxmail.com 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | import math, copy 10 | from typing import Optional, List 11 | import torch, pdb 12 | from torch import nn, Tensor 13 | import torch.nn.functional as F 14 | from config import config 15 | from detectron2.modeling.poolers import ROIPooler, cat 16 | from detectron2.structures import Boxes 17 | 18 | class DynamicConv(nn.Module): 19 | 20 | def __init__(self, cfg): 21 | super().__init__() 22 | 23 | self.hidden_dim = cfg.MODEL.SparseRCNN.HIDDEN_DIM 24 | self.dim_dynamic = cfg.MODEL.SparseRCNN.DIM_DYNAMIC 25 | self.num_dynamic = cfg.MODEL.SparseRCNN.NUM_DYNAMIC 26 | self.num_params = self.hidden_dim * self.dim_dynamic 27 | self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params) 28 | 29 | self.norm1 = nn.LayerNorm(self.dim_dynamic) 30 | self.norm2 = nn.LayerNorm(self.hidden_dim) 31 | 32 | self.activation = nn.ReLU(inplace=True) 33 | 34 | pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 35 | num_output = self.hidden_dim * pooler_resolution ** 2 36 | self.out_layer = nn.Linear(num_output, self.hidden_dim) 37 | self.norm3 = nn.LayerNorm(self.hidden_dim) 38 | 39 | def forward(self, pro_features, roi_features): 40 | ''' 41 | pro_features: (1, N * nr_boxes, self.d_model) 42 | roi_features: (49, N * nr_boxes, self.d_model) 43 | ''' 44 | features = roi_features.permute(1, 0, 2) 45 | parameters = self.dynamic_layer(pro_features).permute(1, 0, 2) 46 | 47 | param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic) 48 | param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim) 49 | 50 | features = torch.bmm(features, param1) 51 | features = self.norm1(features) 52 | features = self.activation(features) 53 | 54 | features = torch.bmm(features, param2) 55 | features = self.norm2(features) 56 | features = self.activation(features) 57 | 58 | features = features.flatten(1) 59 | features = self.out_layer(features) 60 | features = self.norm3(features) 61 | features = self.activation(features) 62 | 63 | return features 64 | 65 | def build_dynamic_conv(cfg): 66 | 67 | return DynamicConv(cfg) -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/run_test.sh: -------------------------------------------------------------------------------- 1 | export NCCL_IB_DISABLE=1 2 | rm -rf __pycache__ 3 | rm -rf output/events* 4 | rm -rf output/log.txt.* 5 | python3 train_net.py --num-gpus 4 \ 6 | --config-file configs/50e.6h.500pro.ignore.yaml \ 7 | --eval-only \ 8 | MODEL.WEIGHTS output/model_0039999.pth 9 | -------------------------------------------------------------------------------- /projects/crowd-e2e-sparse-rcnn/run_train.sh: -------------------------------------------------------------------------------- 1 | export NCCL_IB_DISABLE=1 2 | rm -rf __pycache__ 3 | rm -rf output/events* 4 | rm -rf output/log.txt.* 5 | python3 train_net.py --num-gpus 8 \ 6 | --config-file configs/50e.6h.500pro.ignore.yaml \ 7 | --resume 8 | -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/__init__.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from Sparse-RCNN(github: https://github.com/PeizeSun/SparseR-CNN) created by Peize Sun, Rufeng Zhang 6 | # Contact: {sunpeize, cxrfzhang}@foxmail.com 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | from .config import add_sparsercnn_config 10 | from .detector import SparseRCNN 11 | from .dataset_mapper import SparseRCNNDatasetMapper 12 | -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/configs/50e.6h.500pro.ignore.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 2 | MODEL: 3 | WEIGHTS: "" 4 | SparseRCNN: 5 | NUM_HEADS: 6 6 | IGNORE_THR: 0.7 7 | WATERSHED: 6 8 | CONFIDENCE_THR: 0.7 9 | RELATION_IOU_THR: 0.4 10 | ITER_NUM: 0 11 | LOW_CONFIDENCE_THR: 0.05 12 | NUM_PROPOSALS: 500 13 | NUM_CLASSES: 1 14 | SOLVER: 15 | IMS_PER_BATCH: 16 16 | BASE_LR: 0.00005 17 | STEPS: (37500,) 18 | MAX_ITER: 50000 19 | CHECKPOINT_PERIOD: 5000 20 | TEST: 21 | EVAL_PERIOD: 5000 22 | OUTPUT_DIR: "output" 23 | -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/configs/Base-SparseRCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SparseRCNN" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | FPN: 11 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 12 | ROI_HEADS: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 14 | ROI_BOX_HEAD: 15 | POOLER_TYPE: "ROIAlignV2" 16 | POOLER_RESOLUTION: 7 17 | POOLER_SAMPLING_RATIO: 2 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.000025 21 | STEPS: (210000, 250000) 22 | MAX_ITER: 270000 23 | WARMUP_FACTOR: 0.01 24 | WARMUP_ITERS: 1000 25 | WEIGHT_DECAY: 0.0001 26 | OPTIMIZER: "ADAMW" 27 | BACKBONE_MULTIPLIER: 1.0 # keep same with BASE_LR. 28 | CLIP_GRADIENTS: 29 | ENABLED: True 30 | CLIP_TYPE: "full_model" 31 | CLIP_VALUE: 1.0 32 | NORM_TYPE: 2.0 33 | SEED: 40244023 34 | INPUT: 35 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 36 | CROP: 37 | ENABLED: False 38 | TYPE: "absolute_range" 39 | SIZE: (384, 600) 40 | FORMAT: "RGB" 41 | TEST: 42 | EVAL_PERIOD: 1000000 43 | DATALOADER: 44 | FILTER_EMPTY_ANNOTATIONS: False 45 | NUM_WORKERS: 3 46 | VERSION: 2 47 | -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/configs/sparsercnn.crowdhuman.res50.500pro.50e.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseRCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | STRIDE_IN_1X1: False 7 | SparseRCNN: 8 | NUM_PROPOSALS: 500 9 | NUM_CLASSES: 1 10 | DATASETS: 11 | TRAIN: ("CrowdHuman_train",) 12 | TEST: ("CrowdHuman_val",) 13 | SOLVER: 14 | STEPS: (7500,) # 15000 / 16 = 937.5 15 | MAX_ITER: 16875 16 | CHECKPOINT_PERIOD: 5000 17 | TEST: 18 | EVAL_PERIOD: 5000 19 | INPUT: 20 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 21 | MAX_SIZE_TRAIN: 1500 22 | MIN_SIZE_TEST: 800 23 | MAX_SIZE_TRAIN: 1500 24 | CROP: 25 | ENABLED: False 26 | FORMAT: "RGB" 27 | OUTPUT_DIR: "output/50e.6h.500pro" -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/models/dynamic_conv.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from Sparse-RCNN(github: https://github.com/PeizeSun/SparseR-CNN) created by Peize Sun, Rufeng Zhang 6 | # Contact: {sunpeize, cxrfzhang}@foxmail.com 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | """ 10 | SparseRCNN Transformer class. 11 | 12 | Copy-paste from torch.nn.Transformer with modifications: 13 | * positional encodings are passed in MHattention 14 | * extra LN at the end of encoder is removed 15 | * decoder returns a stack of activations from all decoding layers 16 | """ 17 | import copy, math, torch 18 | from typing import Optional, List 19 | from torch import nn, Tensor 20 | import torch.nn.functional as F 21 | 22 | _DEFAULT_SCALE_CLAMP = math.log(100000.0 / 16) 23 | 24 | def build_dynamic_conv(cfg): 25 | 26 | return DynamicConv(cfg) 27 | 28 | class DynamicConv(nn.Module): 29 | 30 | def __init__(self, cfg): 31 | super().__init__() 32 | 33 | self.hidden_dim = cfg.MODEL.SparseRCNN.HIDDEN_DIM 34 | self.dim_dynamic = cfg.MODEL.SparseRCNN.DIM_DYNAMIC 35 | self.num_dynamic = cfg.MODEL.SparseRCNN.NUM_DYNAMIC 36 | self.num_params = self.hidden_dim * self.dim_dynamic 37 | self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params) 38 | 39 | self.norm1 = nn.LayerNorm(self.dim_dynamic) 40 | self.norm2 = nn.LayerNorm(self.hidden_dim) 41 | 42 | self.activation = nn.ReLU(inplace=True) 43 | 44 | pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 45 | num_output = self.hidden_dim * pooler_resolution ** 2 46 | self.out_layer = nn.Linear(num_output, self.hidden_dim) 47 | self.norm3 = nn.LayerNorm(self.hidden_dim) 48 | 49 | def forward(self, pro_features, roi_features): 50 | ''' 51 | pro_features: (1, N * nr_boxes, self.d_model) 52 | roi_features: (49, N * nr_boxes, self.d_model) 53 | ''' 54 | features = roi_features.permute(1, 0, 2) 55 | parameters = self.dynamic_layer(pro_features).permute(1, 0, 2) 56 | 57 | param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic) 58 | param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim) 59 | 60 | features = torch.bmm(features, param1) 61 | features = self.norm1(features) 62 | features = self.activation(features) 63 | 64 | features = torch.bmm(features, param2) 65 | features = self.norm2(features) 66 | features = self.activation(features) 67 | 68 | features = features.flatten(1) 69 | features = self.out_layer(features) 70 | features = self.norm3(features) 71 | features = self.activation(features) 72 | 73 | return features -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/run_test.sh: -------------------------------------------------------------------------------- 1 | rm -rf __pycache__ 2 | export NCCL_IB_DISABLE=1 3 | python3 train_net.py --num-gpus 4 \ 4 | --config-file configs/50e.6h.500pro.ignore.yaml \ 5 | --eval-only \ 6 | MODEL.WEIGHTS output/model_0019999.pth -------------------------------------------------------------------------------- /projects/sparse-rcnn-baseline/run_train.sh: -------------------------------------------------------------------------------- 1 | rm -rf __pycache__ 2 | export NCCL_IB_DISABLE=1 3 | python3 train_net.py --num-gpus 8 \ 4 | --config-file configs/50e.6h.500pro.ignore.yaml \ 5 | -------------------------------------------------------------------------------- /readme/fig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/readme/fig.jpg -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=3 4 | include_trailing_comma=True 5 | known_standard_library=numpy,setuptools,mock 6 | skip=./datasets,docs 7 | skip_glob=*/__init__.py 8 | known_myself=detectron2 9 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx 10 | no_lines_before=STDLIB,THIRDPARTY 11 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 12 | default_section=FIRSTPARTY 13 | 14 | [mypy] 15 | python_version=3.6 16 | ignore_missing_imports = True 17 | warn_unused_configs = True 18 | disallow_untyped_defs = True 19 | check_untyped_defs = True 20 | warn_unused_ignores = True 21 | warn_redundant_casts = True 22 | show_column_numbers = True 23 | follow_imports = silent 24 | allow_redefinition = True 25 | ; Require all functions to be annotated 26 | disallow_incomplete_defs = True 27 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | ## Unit Tests 2 | 3 | To run the unittests, do: 4 | ``` 5 | cd detectron2 6 | python -m unittest discover -v -s ./tests 7 | ``` 8 | 9 | There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev). 10 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/tests/data/__init__.py -------------------------------------------------------------------------------- /tests/data/test_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | from torch.utils.data.sampler import SequentialSampler 4 | 5 | from detectron2.data.samplers import GroupedBatchSampler 6 | 7 | 8 | class TestGroupedBatchSampler(unittest.TestCase): 9 | def test_missing_group_id(self): 10 | sampler = SequentialSampler(list(range(100))) 11 | group_ids = [1] * 100 12 | samples = GroupedBatchSampler(sampler, group_ids, 2) 13 | 14 | for mini_batch in samples: 15 | self.assertEqual(len(mini_batch), 2) 16 | 17 | def test_groups(self): 18 | sampler = SequentialSampler(list(range(100))) 19 | group_ids = [1, 0] * 50 20 | samples = GroupedBatchSampler(sampler, group_ids, 2) 21 | 22 | for mini_batch in samples: 23 | self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0) 24 | -------------------------------------------------------------------------------- /tests/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/tests/layers/__init__.py -------------------------------------------------------------------------------- /tests/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/tests/modeling/__init__.py -------------------------------------------------------------------------------- /tests/modeling/test_box2box_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import unittest 4 | import torch 5 | 6 | from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def random_boxes(mean_box, stdev, N): 12 | return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) 13 | 14 | 15 | class TestBox2BoxTransform(unittest.TestCase): 16 | def test_reconstruction(self): 17 | weights = (5, 5, 10, 10) 18 | b2b_tfm = Box2BoxTransform(weights=weights) 19 | src_boxes = random_boxes([10, 10, 20, 20], 1, 10) 20 | dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) 21 | 22 | devices = [torch.device("cpu")] 23 | if torch.cuda.is_available(): 24 | devices.append(torch.device("cuda")) 25 | for device in devices: 26 | src_boxes = src_boxes.to(device=device) 27 | dst_boxes = dst_boxes.to(device=device) 28 | deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) 29 | dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) 30 | assert torch.allclose(dst_boxes, dst_boxes_reconstructed) 31 | 32 | 33 | def random_rotated_boxes(mean_box, std_length, std_angle, N): 34 | return torch.cat( 35 | [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1 36 | ) + torch.tensor(mean_box, dtype=torch.float) 37 | 38 | 39 | class TestBox2BoxTransformRotated(unittest.TestCase): 40 | def test_reconstruction(self): 41 | weights = (5, 5, 10, 10, 1) 42 | b2b_transform = Box2BoxTransformRotated(weights=weights) 43 | src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) 44 | dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) 45 | 46 | devices = [torch.device("cpu")] 47 | if torch.cuda.is_available(): 48 | devices.append(torch.device("cuda")) 49 | for device in devices: 50 | src_boxes = src_boxes.to(device=device) 51 | dst_boxes = dst_boxes.to(device=device) 52 | deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) 53 | dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes) 54 | assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5) 55 | # angle difference has to be normalized 56 | assert torch.allclose( 57 | (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0, 58 | torch.zeros_like(dst_boxes[:, 4]), 59 | atol=1e-4, 60 | ) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /tests/modeling/test_matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import unittest 3 | import torch 4 | 5 | from detectron2.config import get_cfg 6 | from detectron2.modeling.matcher import Matcher 7 | from detectron2.utils.env import TORCH_VERSION 8 | 9 | 10 | class TestMatcher(unittest.TestCase): 11 | # need https://github.com/pytorch/pytorch/pull/38378 12 | @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") 13 | def test_scriptability(self): 14 | cfg = get_cfg() 15 | anchor_matcher = Matcher( 16 | cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True 17 | ) 18 | match_quality_matrix = torch.tensor( 19 | [[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]] 20 | ) 21 | expected_matches = torch.tensor([1, 1, 2, 0]) 22 | expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8) 23 | 24 | matches, match_labels = anchor_matcher(match_quality_matrix) 25 | self.assertTrue(torch.allclose(matches, expected_matches)) 26 | self.assertTrue(torch.allclose(match_labels, expected_match_labels)) 27 | 28 | # nonzero_tuple must be import explicitly to let jit know what it is. 29 | # https://github.com/pytorch/pytorch/issues/38964 30 | from detectron2.layers import nonzero_tuple # noqa F401 31 | 32 | scripted_matcher = torch.jit.script(Matcher) 33 | scripted_anchor_matcher = scripted_matcher( 34 | cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True 35 | ) 36 | matches, match_labels = scripted_anchor_matcher(match_quality_matrix) 37 | self.assertTrue(torch.allclose(matches, expected_matches)) 38 | self.assertTrue(torch.allclose(match_labels, expected_match_labels)) 39 | 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /tests/modeling/test_roi_pooler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import unittest 4 | import torch 5 | 6 | from detectron2.modeling.poolers import ROIPooler 7 | from detectron2.structures import Boxes, RotatedBoxes 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class TestROIPooler(unittest.TestCase): 13 | def _rand_boxes(self, num_boxes, x_max, y_max): 14 | coords = torch.rand(num_boxes, 4) 15 | coords[:, 0] *= x_max 16 | coords[:, 1] *= y_max 17 | coords[:, 2] *= x_max 18 | coords[:, 3] *= y_max 19 | boxes = torch.zeros(num_boxes, 4) 20 | boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2]) 21 | boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3]) 22 | boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2]) 23 | boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3]) 24 | return boxes 25 | 26 | def _test_roialignv2_roialignrotated_match(self, device): 27 | pooler_resolution = 14 28 | canonical_level = 4 29 | canonical_scale_factor = 2 ** canonical_level 30 | pooler_scales = (1.0 / canonical_scale_factor,) 31 | sampling_ratio = 0 32 | 33 | N, C, H, W = 2, 4, 10, 8 34 | N_rois = 10 35 | std = 11 36 | mean = 0 37 | feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean 38 | 39 | features = [feature.to(device)] 40 | 41 | rois = [] 42 | rois_rotated = [] 43 | for _ in range(N): 44 | boxes = self._rand_boxes( 45 | num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor 46 | ) 47 | 48 | rotated_boxes = torch.zeros(N_rois, 5) 49 | rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 50 | rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 51 | rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] 52 | rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] 53 | rois.append(Boxes(boxes).to(device)) 54 | rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) 55 | 56 | roialignv2_pooler = ROIPooler( 57 | output_size=pooler_resolution, 58 | scales=pooler_scales, 59 | sampling_ratio=sampling_ratio, 60 | pooler_type="ROIAlignV2", 61 | ) 62 | 63 | roialignv2_out = roialignv2_pooler(features, rois) 64 | 65 | roialignrotated_pooler = ROIPooler( 66 | output_size=pooler_resolution, 67 | scales=pooler_scales, 68 | sampling_ratio=sampling_ratio, 69 | pooler_type="ROIAlignRotated", 70 | ) 71 | 72 | roialignrotated_out = roialignrotated_pooler(features, rois_rotated) 73 | 74 | self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4)) 75 | 76 | def test_roialignv2_roialignrotated_match_cpu(self): 77 | self._test_roialignv2_roialignrotated_match(device="cpu") 78 | 79 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 80 | def test_roialignv2_roialignrotated_match_cuda(self): 81 | self._test_roialignv2_roialignrotated_match(device="cuda") 82 | 83 | 84 | if __name__ == "__main__": 85 | unittest.main() 86 | -------------------------------------------------------------------------------- /tests/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/tests/structures/__init__.py -------------------------------------------------------------------------------- /tests/structures/test_imagelist.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | import unittest 4 | from typing import Sequence 5 | import torch 6 | 7 | from detectron2.structures import ImageList 8 | 9 | 10 | class TestImageList(unittest.TestCase): 11 | def test_imagelist_padding_shape(self): 12 | class TensorToImageList(torch.nn.Module): 13 | def forward(self, tensors: Sequence[torch.Tensor]): 14 | return ImageList.from_tensors(tensors, 4).tensor 15 | 16 | func = torch.jit.trace( 17 | TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],) 18 | ) 19 | ret = func([torch.ones((3, 15, 20), dtype=torch.float32)]) 20 | self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape)) 21 | 22 | func = torch.jit.trace( 23 | TensorToImageList(), 24 | ( 25 | [ 26 | torch.ones((3, 16, 10), dtype=torch.float32), 27 | torch.ones((3, 13, 11), dtype=torch.float32), 28 | ], 29 | ), 30 | ) 31 | ret = func( 32 | [ 33 | torch.ones((3, 25, 20), dtype=torch.float32), 34 | torch.ones((3, 10, 10), dtype=torch.float32), 35 | ] 36 | ) 37 | # does not support calling with different #images 38 | self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape)) 39 | -------------------------------------------------------------------------------- /tests/structures/test_instances.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import unittest 3 | import torch 4 | 5 | from detectron2.structures import Instances 6 | 7 | 8 | class TestInstancesIndexing(unittest.TestCase): 9 | def test_int_indexing(self): 10 | attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]]) 11 | attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4]) 12 | instances = Instances((100, 100)) 13 | instances.attr1 = attr1 14 | instances.attr2 = attr2 15 | for i in range(-len(instances), len(instances)): 16 | inst = instances[i] 17 | self.assertEqual((inst.attr1 == attr1[i]).all(), True) 18 | self.assertEqual((inst.attr2 == attr2[i]).all(), True) 19 | 20 | self.assertRaises(IndexError, lambda: instances[len(instances)]) 21 | self.assertRaises(IndexError, lambda: instances[-len(instances) - 1]) 22 | 23 | 24 | if __name__ == "__main__": 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /tests/test_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import unittest 3 | from collections import OrderedDict 4 | import torch 5 | from torch import nn 6 | 7 | from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts 8 | from detectron2.utils.logger import setup_logger 9 | 10 | 11 | class TestCheckpointer(unittest.TestCase): 12 | def setUp(self): 13 | setup_logger() 14 | 15 | def create_complex_model(self): 16 | m = nn.Module() 17 | m.block1 = nn.Module() 18 | m.block1.layer1 = nn.Linear(2, 3) 19 | m.layer2 = nn.Linear(3, 2) 20 | m.res = nn.Module() 21 | m.res.layer2 = nn.Linear(3, 2) 22 | 23 | state_dict = OrderedDict() 24 | state_dict["layer1.weight"] = torch.rand(3, 2) 25 | state_dict["layer1.bias"] = torch.rand(3) 26 | state_dict["layer2.weight"] = torch.rand(2, 3) 27 | state_dict["layer2.bias"] = torch.rand(2) 28 | state_dict["res.layer2.weight"] = torch.rand(2, 3) 29 | state_dict["res.layer2.bias"] = torch.rand(2) 30 | return m, state_dict 31 | 32 | def test_complex_model_loaded(self): 33 | for add_data_parallel in [False, True]: 34 | model, state_dict = self.create_complex_model() 35 | if add_data_parallel: 36 | model = nn.DataParallel(model) 37 | model_sd = model.state_dict() 38 | 39 | align_and_update_state_dicts(model_sd, state_dict) 40 | for loaded, stored in zip(model_sd.values(), state_dict.values()): 41 | # different tensor references 42 | self.assertFalse(id(loaded) == id(stored)) 43 | # same content 44 | self.assertTrue(loaded.equal(stored)) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /tests/test_export_caffe2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | # -*- coding: utf-8 -*- 3 | 4 | import copy 5 | import numpy as np 6 | import os 7 | import tempfile 8 | import unittest 9 | import cv2 10 | import torch 11 | from fvcore.common.file_io import PathManager 12 | 13 | from detectron2 import model_zoo 14 | from detectron2.checkpoint import DetectionCheckpointer 15 | from detectron2.config import get_cfg 16 | from detectron2.data import DatasetCatalog 17 | from detectron2.modeling import build_model 18 | from detectron2.utils.logger import setup_logger 19 | 20 | 21 | @unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.") 22 | class TestCaffe2Export(unittest.TestCase): 23 | def setUp(self): 24 | setup_logger() 25 | 26 | def _test_model(self, config_path, device="cpu"): 27 | # requires extra dependencies 28 | from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model 29 | 30 | cfg = get_cfg() 31 | cfg.merge_from_file(model_zoo.get_config_file(config_path)) 32 | cfg = add_export_config(cfg) 33 | cfg.MODEL.DEVICE = device 34 | 35 | model = build_model(cfg) 36 | DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path)) 37 | 38 | inputs = [{"image": self._get_test_image()}] 39 | c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs)) 40 | 41 | with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d: 42 | c2_model.save_protobuf(d) 43 | c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs)) 44 | c2_model = Caffe2Model.load_protobuf(d) 45 | c2_model(inputs)[0]["instances"] 46 | 47 | def _get_test_image(self): 48 | try: 49 | file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"] 50 | assert PathManager.exists(file_name) 51 | except Exception: 52 | self.skipTest("COCO dataset not available.") 53 | 54 | with PathManager.open(file_name, "rb") as f: 55 | buf = f.read() 56 | img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR) 57 | assert img is not None, file_name 58 | return torch.from_numpy(img.transpose(2, 0, 1)) 59 | 60 | def testMaskRCNN(self): 61 | self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") 62 | 63 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 64 | def testMaskRCNNGPU(self): 65 | self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda") 66 | 67 | def testRetinaNet(self): 68 | self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml") 69 | 70 | def testPanopticFPN(self): 71 | self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") 72 | -------------------------------------------------------------------------------- /tests/test_model_analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import unittest 5 | import torch 6 | 7 | import detectron2.model_zoo as model_zoo 8 | from detectron2.config import get_cfg 9 | from detectron2.modeling import build_model 10 | from detectron2.utils.analysis import flop_count_operators, parameter_count 11 | 12 | 13 | def get_model_zoo(config_path): 14 | """ 15 | Like model_zoo.get, but do not load any weights (even pretrained) 16 | """ 17 | cfg_file = model_zoo.get_config_file(config_path) 18 | cfg = get_cfg() 19 | cfg.merge_from_file(cfg_file) 20 | if not torch.cuda.is_available(): 21 | cfg.MODEL.DEVICE = "cpu" 22 | return build_model(cfg) 23 | 24 | 25 | class RetinaNetTest(unittest.TestCase): 26 | def setUp(self): 27 | self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml") 28 | 29 | def test_flop(self): 30 | # RetinaNet supports flop-counting with random inputs 31 | inputs = [{"image": torch.rand(3, 800, 800)}] 32 | res = flop_count_operators(self.model, inputs) 33 | self.assertTrue(int(res["conv"]), 146) # 146B flops 34 | 35 | def test_param_count(self): 36 | res = parameter_count(self.model) 37 | self.assertTrue(res[""], 37915572) 38 | self.assertTrue(res["backbone"], 31452352) 39 | 40 | 41 | class FasterRCNNTest(unittest.TestCase): 42 | def setUp(self): 43 | self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml") 44 | 45 | def test_flop(self): 46 | # Faster R-CNN supports flop-counting with random inputs 47 | inputs = [{"image": torch.rand(3, 800, 800)}] 48 | res = flop_count_operators(self.model, inputs) 49 | 50 | # This only checks flops for backbone & proposal generator 51 | # Flops for box head is not conv, and depends on #proposals, which is 52 | # almost 0 for random inputs. 53 | self.assertTrue(int(res["conv"]), 117) 54 | 55 | def test_param_count(self): 56 | res = parameter_count(self.model) 57 | self.assertTrue(res[""], 41699936) 58 | self.assertTrue(res["backbone"], 26799296) 59 | -------------------------------------------------------------------------------- /tests/test_model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import unittest 4 | 5 | from detectron2 import model_zoo 6 | from detectron2.modeling import FPN, GeneralizedRCNN 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class TestModelZoo(unittest.TestCase): 12 | def test_get_returns_model(self): 13 | model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False) 14 | self.assertIsInstance(model, GeneralizedRCNN) 15 | self.assertIsInstance(model.backbone, FPN) 16 | 17 | def test_get_invalid_model(self): 18 | self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml") 19 | 20 | def test_get_url(self): 21 | url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml") 22 | self.assertEqual( 23 | url, 24 | "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl", # noqa 25 | ) 26 | 27 | 28 | if __name__ == "__main__": 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains a few scripts that use detectron2. 3 | 4 | 5 | * `train_net.py` 6 | 7 | An example training script that's made to train builtin models of detectron2. 8 | 9 | For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md). 10 | 11 | * `plain_train_net.py` 12 | 13 | Similar to `train_net.py`, but implements a training loop instead of using `Trainer`. 14 | This script includes fewer features but it may be more friendly to hackers. 15 | 16 | * `benchmark.py` 17 | 18 | Benchmark the training speed, inference speed or data loading speed of a given config. 19 | 20 | Usage: 21 | ``` 22 | python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags] 23 | ``` 24 | 25 | * `visualize_json_results.py` 26 | 27 | Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator` 28 | 29 | Usage: 30 | ``` 31 | python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val 32 | ``` 33 | If not using a builtin dataset, you'll need your own script or modify this script. 34 | 35 | * `visualize_data.py` 36 | 37 | Visualize ground truth raw annotations or training data (after preprocessing/augmentations). 38 | 39 | Usage: 40 | ``` 41 | python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show] 42 | ``` 43 | 44 | NOTE: the script does not stop by itself when using `--source dataloader` because a training 45 | dataloader is usually infinite. 46 | -------------------------------------------------------------------------------- /tools/convert-torchvision-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import pickle as pkl 5 | import sys 6 | import torch 7 | 8 | """ 9 | Usage: 10 | # download one of the ResNet{18,34,50,101,152} models from torchvision: 11 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth 12 | # run the conversion 13 | ./convert-torchvision-to-d2.py r50.pth r50.pkl 14 | 15 | # Then, use r50.pkl with the following changes in config: 16 | 17 | MODEL: 18 | WEIGHTS: "/path/to/r50.pkl" 19 | PIXEL_MEAN: [123.675, 116.280, 103.530] 20 | PIXEL_STD: [58.395, 57.120, 57.375] 21 | RESNETS: 22 | DEPTH: 50 23 | STRIDE_IN_1X1: False 24 | INPUT: 25 | FORMAT: "RGB" 26 | 27 | These models typically produce slightly worse results than the 28 | pre-trained ResNets we use in official configs, which are the 29 | original ResNet models released by MSRA. 30 | """ 31 | 32 | if __name__ == "__main__": 33 | input = sys.argv[1] 34 | 35 | obj = torch.load(input, map_location="cpu") 36 | 37 | newmodel = {} 38 | for k in list(obj.keys()): 39 | old_k = k 40 | if "layer" not in k: 41 | k = "stem." + k 42 | for t in [1, 2, 3, 4]: 43 | k = k.replace("layer{}".format(t), "res{}".format(t + 1)) 44 | for t in [1, 2, 3]: 45 | k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) 46 | k = k.replace("downsample.0", "shortcut") 47 | k = k.replace("downsample.1", "shortcut.norm") 48 | print(old_k, "->", k) 49 | newmodel[k] = obj.pop(old_k).detach().numpy() 50 | 51 | res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} 52 | 53 | with open(sys.argv[2], "wb") as f: 54 | pkl.dump(res, f) 55 | if obj: 56 | print("Unconverted keys:", obj.keys()) 57 | -------------------------------------------------------------------------------- /tools/deploy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # See https://pytorch.org/tutorials/advanced/cpp_frontend.html 3 | cmake_minimum_required(VERSION 3.0 FATAL_ERROR) 4 | project(caffe2_mask_rcnn) 5 | 6 | find_package(Torch REQUIRED) 7 | find_package(gflags REQUIRED) 8 | find_package(OpenCV REQUIRED) 9 | 10 | add_executable(caffe2_mask_rcnn caffe2_mask_rcnn.cpp) 11 | target_link_libraries( 12 | caffe2_mask_rcnn 13 | "${TORCH_LIBRARIES}" gflags glog ${OpenCV_LIBS}) 14 | set_property(TARGET caffe2_mask_rcnn PROPERTY CXX_STANDARD 14) 15 | 16 | 17 | add_executable(torchscript_traced_mask_rcnn torchscript_traced_mask_rcnn.cpp) 18 | target_link_libraries( 19 | torchscript_traced_mask_rcnn 20 | "${TORCH_LIBRARIES}" ${OpenCV_LIBS}) 21 | set_property(TARGET torchscript_traced_mask_rcnn PROPERTY CXX_STANDARD 14) 22 | -------------------------------------------------------------------------------- /tools/deploy/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains: 3 | 4 | 1. A script that converts a detectron2 model to caffe2 format. 5 | 6 | 2. An example that loads a Mask R-CNN model in caffe2 format and runs inference. 7 | 8 | See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html) 9 | for their usage. 10 | -------------------------------------------------------------------------------- /tools/deploy/torchscript_traced_mask_rcnn.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | // experimental. don't use 14 | int main(int argc, const char* argv[]) { 15 | if (argc != 3) { 16 | return 1; 17 | } 18 | std::string image_file = argv[2]; 19 | 20 | torch::autograd::AutoGradMode guard(false); 21 | auto module = torch::jit::load(argv[1]); 22 | 23 | assert(module.buffers().size() > 0); 24 | // Assume that the entire model is on the same device. 25 | // We just put input to this device. 26 | auto device = (*begin(module.buffers())).device(); 27 | 28 | cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR); 29 | const int height = input_img.rows; 30 | const int width = input_img.cols; 31 | // FPN models require divisibility of 32 32 | assert(height % 32 == 0 && width % 32 == 0); 33 | const int channels = 3; 34 | 35 | auto input = torch::from_blob( 36 | input_img.data, {1, height, width, channels}, torch::kUInt8); 37 | // NHWC to NCHW 38 | input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous(); 39 | 40 | std::array im_info_data{height * 1.0f, width * 1.0f, 1.0f}; 41 | auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device); 42 | 43 | // run the network 44 | auto output = module.forward({std::make_tuple(input, im_info)}); 45 | if (device.is_cuda()) 46 | c10::cuda::getCurrentCUDAStream().synchronize(); 47 | 48 | // run 3 more times to benchmark 49 | int N_benchmark = 3; 50 | auto start_time = chrono::high_resolution_clock::now(); 51 | for (int i = 0; i < N_benchmark; ++i) { 52 | output = module.forward({std::make_tuple(input, im_info)}); 53 | if (device.is_cuda()) 54 | c10::cuda::getCurrentCUDAStream().synchronize(); 55 | } 56 | auto end_time = chrono::high_resolution_clock::now(); 57 | auto ms = chrono::duration_cast(end_time - start_time) 58 | .count(); 59 | cout << "Latency (should vary with different inputs): " 60 | << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; 61 | 62 | auto outputs = output.toTuple()->elements(); 63 | // parse Mask R-CNN outputs 64 | auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(), 65 | labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor(); 66 | 67 | cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl; 68 | cout << "scores: " << scores.toString() << " " << scores.sizes() << endl; 69 | cout << "labels: " << labels.toString() << " " << labels.sizes() << endl; 70 | cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes() 71 | << endl; 72 | 73 | int num_instances = bbox.sizes()[0]; 74 | cout << bbox << endl; 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /utils/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python3 setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/utils/__init__.py -------------------------------------------------------------------------------- /utils/benchmark.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------ 6 | 7 | """ 8 | Benchmark inference speed of Deformable DETR. 9 | """ 10 | import os 11 | import time 12 | import argparse 13 | import torch 14 | from main import get_args_parser as get_main_args_parser 15 | # from models import build_model 16 | from config import config 17 | from detr import build_model 18 | from datasets import build_dataset 19 | from misc import nested_tensor_from_tensor_list 20 | import pdb 21 | 22 | def get_benckmark_arg_parser(): 23 | parser = argparse.ArgumentParser('Benchmark inference speed of Deformable DETR.') 24 | parser.add_argument('--num_iters', type=int, default=300, help='total iters to benchmark speed') 25 | parser.add_argument('--warm_iters', type=int, default=5, help='ignore first several iters that are very slow') 26 | parser.add_argument('--batch_size', type=int, default=1, help='batch size in inference') 27 | parser.add_argument('--resume', type=str, help='load the pre-trained checkpoint') 28 | return parser 29 | 30 | 31 | @torch.no_grad() 32 | def measure_average_inference_time(model, inputs, num_iters=100, warm_iters=5): 33 | ts = [] 34 | for iter_ in range(num_iters): 35 | torch.cuda.synchronize() 36 | t_ = time.perf_counter() 37 | model(inputs) 38 | torch.cuda.synchronize() 39 | t = time.perf_counter() - t_ 40 | if iter_ >= warm_iters: 41 | ts.append(t) 42 | print(ts) 43 | return sum(ts) / len(ts) 44 | 45 | 46 | def benchmark(): 47 | args, _ = get_benckmark_arg_parser().parse_known_args() 48 | main_args = get_main_args_parser().parse_args(_) 49 | assert args.warm_iters < args.num_iters and args.num_iters > 0 and args.warm_iters >= 0 50 | assert args.batch_size > 0 51 | assert args.resume is None or os.path.exists(args.resume) 52 | dataset = build_dataset('val', main_args) 53 | model, _, _ = build_model(main_args) 54 | model.cuda() 55 | model.eval() 56 | if args.resume is not None: 57 | ckpt = torch.load(args.resume, map_location=lambda storage, loc: storage) 58 | model.load_state_dict(ckpt['model']) 59 | inputs = nested_tensor_from_tensor_list([dataset.__getitem__(0)[0].cuda() for _ in range(args.batch_size)]) 60 | t = measure_average_inference_time(model, inputs, args.num_iters, args.warm_iters) 61 | return 1.0 / t * args.batch_size 62 | 63 | 64 | if __name__ == '__main__': 65 | fps = benchmark() 66 | print(f'Inference Speed: {fps:.1f} FPS') 67 | 68 | -------------------------------------------------------------------------------- /utils/detToolkits/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | __pycache__ 4 | tmp 5 | -------------------------------------------------------------------------------- /utils/detToolkits/detools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | __author__ = 'jyn' 6 | __email__ = 'jyn@megvii.com' 7 | 8 | from .box import * 9 | from .image import * 10 | from .database import * 11 | from .evaluator import * 12 | -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-research/Iter-E2EDET/c649a0b29be48e7253048a2c8efd6b28a2e3197b/utils/nms/__init__.py -------------------------------------------------------------------------------- /utils/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | from nms.gpu_nms import gpu_nms 2 | from nms.cpu_nms import cpu_nms, cpu_soft_nms 3 | import numpy as np 4 | 5 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): 6 | 7 | keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32), 8 | np.float32(sigma), np.float32(Nt), 9 | np.float32(threshold), 10 | np.uint8(method)) 11 | return keep 12 | 13 | 14 | # Original NMS implementation 15 | def nms(dets, thresh, force_cpu=True): 16 | """Dispatch to either CPU or GPU NMS implementations.""" 17 | if dets.shape[0] == 0: 18 | return [] 19 | if not force_cpu: 20 | return gpu_nms(dets, thresh, device_id=0) 21 | else: 22 | return cpu_nms(dets, thresh) 23 | -------------------------------------------------------------------------------- /utils/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ------------------------------------------------------------------------ 3 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 4 | # -------------------------------------------------------- 5 | # Modified from Fast R-CNN 6 | # Copyright (c) 2015 Microsoft 7 | # Licensed under The MIT License [see LICENSE for details] 8 | # Written by Ross Girshick 9 | # -------------------------------------------------------- 10 | 11 | import numpy as np 12 | 13 | def py_cpu_nms(dets, thresh): 14 | """Pure Python NMS baseline.""" 15 | x1 = dets[:, 0] 16 | y1 = dets[:, 1] 17 | x2 = dets[:, 2] 18 | y2 = dets[:, 3] 19 | scores = dets[:, 4] 20 | 21 | areas = (x2 - x1) * (y2 - y1) 22 | order = scores.argsort()[::-1] 23 | 24 | keep = [] 25 | eps = 1e-8 26 | while order.size > 0: 27 | i = order[0] 28 | keep.append(i) 29 | xx1 = np.maximum(x1[i], x1[order[1:]]) 30 | yy1 = np.maximum(y1[i], y1[order[1:]]) 31 | xx2 = np.minimum(x2[i], x2[order[1:]]) 32 | yy2 = np.minimum(y2[i], y2[order[1:]]) 33 | 34 | w = np.maximum(0.0, xx2 - xx1) 35 | h = np.maximum(0.0, yy2 - yy1) 36 | inter = w * h 37 | ovr = inter / (areas[i] + areas[order[1:]] - inter + eps) 38 | 39 | inds = np.where(ovr <= thresh)[0] 40 | order = order[inds + 1] 41 | 42 | return keep 43 | def _test(): 44 | box1 = np.array([33,45,145,230,0.7])[None,:] 45 | box2 = np.array([44,54,123,348,0.8])[None,:] 46 | box3 = np.array([88,12,340,342,0.65])[None,:] 47 | boxes = np.concatenate([box1,box2,box3],axis = 0) 48 | nms_thresh = 0.5 49 | keep = py_cpu_nms(boxes,nms_thresh) 50 | alive_boxes = boxes[keep] 51 | if __name__=='__main__': 52 | _test() --------------------------------------------------------------------------------