├── .clang-format ├── .flake8 ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── README.md ├── configs ├── Base-RCNN-C4.yaml ├── Base-RCNN-DilatedC5.yaml ├── Base-RCNN-FPN.yaml ├── Base-RetinaNet.yaml ├── COCO-Detection │ ├── fast_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_101_C4_3x.yaml │ ├── faster_rcnn_R_101_DC5_3x.yaml │ ├── faster_rcnn_R_101_FPN_3x.yaml │ ├── faster_rcnn_R_50_C4_1x.yaml │ ├── faster_rcnn_R_50_C4_3x.yaml │ ├── faster_rcnn_R_50_DC5_1x.yaml │ ├── faster_rcnn_R_50_DC5_3x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_3x.yaml │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── retinanet_R_101_FPN_3x.yaml │ ├── retinanet_R_50_FPN_1x.yaml │ ├── retinanet_R_50_FPN_3x.yaml │ ├── rpn_R_50_C4_1x.yaml │ └── rpn_R_50_FPN_1x.yaml ├── COCO-InstanceSegmentation │ ├── mask_rcnn_R_101_C4_3x.yaml │ ├── mask_rcnn_R_101_DC5_3x.yaml │ ├── mask_rcnn_R_101_FPN_3x.yaml │ ├── mask_rcnn_R_50_C4_1x.yaml │ ├── mask_rcnn_R_50_C4_3x.yaml │ ├── mask_rcnn_R_50_DC5_1x.yaml │ ├── mask_rcnn_R_50_DC5_3x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_3x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-Keypoints │ ├── Base-Keypoint-RCNN-FPN.yaml │ ├── keypoint_rcnn_R_101_FPN_3x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_3x.yaml │ └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-PanopticSegmentation │ ├── Base-Panoptic-FPN.yaml │ ├── panoptic_fpn_R_101_3x.yaml │ ├── panoptic_fpn_R_50_1x.yaml │ └── panoptic_fpn_R_50_3x.yaml ├── Cityscapes │ └── mask_rcnn_R_50_FPN.yaml ├── Detectron1-Comparisons │ ├── README.md │ ├── faster_rcnn_R_50_FPN_noaug_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_R_50_FPN_noaug_1x.yaml ├── LVIS-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── Misc │ ├── cascade_mask_rcnn_R_50_FPN_1x.yaml │ ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml │ ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml │ ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml │ └── semantic_R_50_FPN_1x.yaml ├── PascalVOC-Detection │ └── faster_rcnn_R_50_C4.yaml ├── RefSeg_deeplab_Dilate_1x_graph.yaml ├── WeaklyGrounding-RN101-C4.yaml └── quick_schedules │ ├── README.md │ ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── fast_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml │ ├── mask_rcnn_R_50_C4_inference_acc_test.yaml │ ├── mask_rcnn_R_50_C4_instant_test.yaml │ ├── mask_rcnn_R_50_C4_training_acc_test.yaml │ ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_instant_test.yaml │ ├── mask_rcnn_R_50_FPN_training_acc_test.yaml │ ├── panoptic_fpn_R_50_inference_acc_test.yaml │ ├── panoptic_fpn_R_50_instant_test.yaml │ ├── panoptic_fpn_R_50_training_acc_test.yaml │ ├── retinanet_R_50_FPN_inference_acc_test.yaml │ ├── retinanet_R_50_FPN_instant_test.yaml │ ├── rpn_R_50_FPN_inference_acc_test.yaml │ ├── rpn_R_50_FPN_instant_test.yaml │ ├── semantic_R_50_FPN_inference_acc_test.yaml │ ├── semantic_R_50_FPN_instant_test.yaml │ └── semantic_R_50_FPN_training_acc_test.yaml ├── demo ├── README.md ├── demo.py └── predictor.py ├── detectron2 ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── c2_model_loading.py │ ├── detection_checkpoint.py │ └── model_zoo.py ├── config │ ├── __init__.py │ ├── compat.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── build.py │ ├── catalog.py │ ├── common.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── builtin_meta.py │ │ ├── cityscapes.py │ │ ├── coco.py │ │ ├── flickr30k.py │ │ ├── lvis.py │ │ ├── lvis_v0_5_categories.py │ │ ├── pascal_voc.py │ │ ├── refcoco.py │ │ └── register_coco.py │ ├── detection_utils.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── grouped_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── transform.py │ │ └── transform_gen.py ├── engine │ ├── __init__.py │ ├── defaults.py │ ├── hooks.py │ ├── launch.py │ └── train_loop.py ├── evaluation │ ├── __init__.py │ ├── cityscapes_evaluation.py │ ├── coco_evaluation.py │ ├── evaluator.py │ ├── flickr30k_evaluation_grounding.py │ ├── flickr30k_evaluation_grounding_lite.py │ ├── flickr30k_evaluation_grounding_reg.py │ ├── flickr30k_evaluation_grounding_reg_ml.py │ ├── flickr30k_evaluation_grounding_v1.py │ ├── flickr30k_evaluation_kac.py │ ├── lvis_evaluation.py │ ├── panoptic_evaluation.py │ ├── pascal_voc_evaluation.py │ ├── recoco_evaluation_grounding.py │ ├── sem_seg_evaluation.py │ └── testing.py ├── layers │ ├── __init__.py │ ├── batch_norm.py │ ├── csrc │ │ ├── README.md │ │ ├── ROIAlign │ │ │ ├── ROIAlign.h │ │ │ ├── ROIAlign_cpu.cpp │ │ │ └── ROIAlign_cuda.cu │ │ ├── ROIAlignRotated │ │ │ ├── ROIAlignRotated.h │ │ │ ├── ROIAlignRotated_cpu.cpp │ │ │ └── ROIAlignRotated_cuda.cu │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ ├── box_iou_rotated_cpu.cpp │ │ │ ├── box_iou_rotated_cuda.cu │ │ │ └── box_iou_rotated_utils.h │ │ ├── deformable │ │ │ ├── deform_conv.h │ │ │ ├── deform_conv_cuda.cu │ │ │ └── deform_conv_cuda_kernel.cu │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ ├── nms_rotated_cpu.cpp │ │ │ └── nms_rotated_cuda.cu │ │ └── vision.cpp │ ├── deform_conv.py │ ├── generate_union_region.py │ ├── gpu_hangon.py │ ├── mask_ops.py │ ├── move2cpu.py │ ├── nms.py │ ├── numerical_stability_softmax.py │ ├── ops.py │ ├── prefetcher.py │ ├── roi_align.py │ ├── roi_align_rotated.py │ ├── rotated_boxes.py │ ├── shape_spec.py │ ├── spatial_coordinate.py │ ├── weighted_smooth_l1_loss.py │ └── wrappers.py ├── modeling │ ├── __init__.py │ ├── anchor_generator.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── build.py │ │ ├── deeplab.py │ │ ├── deeplabv2.py │ │ ├── fpn.py │ │ ├── resnet.py │ │ └── resnet101.py │ ├── box_regression.py │ ├── matcher.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── build.py │ │ ├── panoptic_fpn.py │ │ ├── rcnn.py │ │ ├── retinanet.py │ │ └── semantic_seg.py │ ├── poolers.py │ ├── postprocessing.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── build.py │ │ ├── proposal_utils.py │ │ ├── rpn.py │ │ ├── rpn_outputs.py │ │ ├── rrpn.py │ │ └── rrpn_outputs.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── keypoint_head.py │ │ ├── mask_head.py │ │ └── roi_heads.py │ ├── sampling.py │ ├── test_time_augmentation.py │ └── weaklygrounding │ │ ├── kac_net.py │ │ ├── loss.py │ │ ├── loss_kac.py │ │ ├── phr_emebdding_kac.py │ │ ├── phrase_embedding_weakly.py │ │ ├── phrase_embedding_weakly_v1.py │ │ ├── vg_detection_weakly.py │ │ ├── vg_detection_weakly_v1.py │ │ ├── vg_detection_weakly_v3.py │ │ ├── weakly_visual_grounding_reg_rel.py │ │ └── weakly_visual_grounding_regression.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── boxes.py │ ├── image_list.py │ ├── instances.py │ ├── keypoints.py │ ├── masks.py │ └── rotated_boxes.py └── utils │ ├── README.md │ ├── __init__.py │ ├── collect_env.py │ ├── colormap.py │ ├── comm.py │ ├── env.py │ ├── events.py │ ├── logger.py │ ├── registry.py │ ├── serialize.py │ ├── video_visualizer.py │ └── visualizer.py ├── dev ├── README.md ├── linter.sh ├── parse_results.sh ├── run_inference_tests.sh └── run_instant_tests.sh ├── docs ├── .gitignore ├── Makefile ├── README.md ├── conf.py ├── index.rst ├── modules │ ├── checkpoint.rst │ ├── config.rst │ ├── data.datasets.rst │ ├── data.rst │ ├── data.samplers.rst │ ├── data.transforms.rst │ ├── engine.rst │ ├── evaluation.rst │ ├── index.rst │ ├── layers.rst │ ├── modeling.rst │ ├── solver.rst │ ├── structures.rst │ └── utils.rst ├── notes │ ├── benchmarks.md │ ├── changelog.md │ ├── compatibility.md │ └── index.rst └── tutorials │ ├── configs.md │ ├── data_loading.md │ ├── datasets.md │ ├── extend.md │ ├── getting_started.md │ ├── index.rst │ ├── install.md │ ├── models.md │ └── training.md ├── killpy.sh ├── outputs └── .DS_Store ├── projects ├── DensePose │ ├── README.md │ ├── apply_net.py │ ├── configs │ │ ├── Base-DensePose-RCNN.yaml │ │ ├── densepose_R_50_FPN_s1x.yaml │ │ └── quick_schedules │ │ │ ├── densepose_R_50_FPN_inference_acc_test.yaml │ │ │ ├── densepose_R_50_FPN_instant_test.yaml │ │ │ └── densepose_R_50_FPN_training_acc_test.yaml │ ├── densepose │ │ ├── __init__.py │ │ ├── config.py │ │ ├── dataset.py │ │ ├── dataset_mapper.py │ │ ├── densepose_coco_evaluation.py │ │ ├── densepose_head.py │ │ ├── evaluator.py │ │ ├── roi_head.py │ │ ├── structures.py │ │ ├── utils │ │ │ ├── dbhelper.py │ │ │ └── logger.py │ │ └── vis │ │ │ ├── base.py │ │ │ ├── bounding_box.py │ │ │ ├── densepose.py │ │ │ └── extractor.py │ ├── doc │ │ ├── TOOL_APPLY_NET.md │ │ └── TOOL_QUERY_DB.md │ ├── query_db.py │ └── train_net.py ├── README.md └── TridentNet │ ├── README.md │ ├── configs │ ├── Base-TridentNet-Fast-C4.yaml │ ├── tridentnet_fast_R_101_C4_3x.yaml │ └── tridentnet_fast_R_50_C4_1x.yaml │ ├── train_net.py │ └── tridentnet │ ├── __init__.py │ ├── config.py │ ├── trident_backbone.py │ ├── trident_conv.py │ ├── trident_rcnn.py │ └── trident_rpn.py ├── scripts ├── demo.sh ├── test.sh ├── test_kac.sh ├── train.sh ├── train_baseline.sh └── train_kac.sh ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_anchor_generator.py ├── test_box2box_transform.py ├── test_boxes.py ├── test_checkpoint.py ├── test_config.py ├── test_data_transform.py ├── test_fast_rcnn.py ├── test_mask_ops.py ├── test_nms_rotated.py ├── test_roi_align.py ├── test_roi_align_rotated.py ├── test_roi_heads.py ├── test_roi_pooler.py ├── test_rotated_boxes.py ├── test_rpn.py └── test_visualizer.py ├── tools ├── benchmark.py ├── train_kac.py ├── train_net.py ├── train_weakly_grounding.py ├── visualize_data.py └── visualize_json_results.py └── unit_test.py /.clang-format: -------------------------------------------------------------------------------- 1 | AccessModifierOffset: -1 2 | AlignAfterOpenBracket: AlwaysBreak 3 | AlignConsecutiveAssignments: false 4 | AlignConsecutiveDeclarations: false 5 | AlignEscapedNewlinesLeft: true 6 | AlignOperands: false 7 | AlignTrailingComments: false 8 | AllowAllParametersOfDeclarationOnNextLine: false 9 | AllowShortBlocksOnASingleLine: false 10 | AllowShortCaseLabelsOnASingleLine: false 11 | AllowShortFunctionsOnASingleLine: Empty 12 | AllowShortIfStatementsOnASingleLine: false 13 | AllowShortLoopsOnASingleLine: false 14 | AlwaysBreakAfterReturnType: None 15 | AlwaysBreakBeforeMultilineStrings: true 16 | AlwaysBreakTemplateDeclarations: true 17 | BinPackArguments: false 18 | BinPackParameters: false 19 | BraceWrapping: 20 | AfterClass: false 21 | AfterControlStatement: false 22 | AfterEnum: false 23 | AfterFunction: false 24 | AfterNamespace: false 25 | AfterObjCDeclaration: false 26 | AfterStruct: false 27 | AfterUnion: false 28 | BeforeCatch: false 29 | BeforeElse: false 30 | IndentBraces: false 31 | BreakBeforeBinaryOperators: None 32 | BreakBeforeBraces: Attach 33 | BreakBeforeTernaryOperators: true 34 | BreakConstructorInitializersBeforeComma: false 35 | BreakAfterJavaFieldAnnotations: false 36 | BreakStringLiterals: false 37 | ColumnLimit: 80 38 | CommentPragmas: '^ IWYU pragma:' 39 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 40 | ConstructorInitializerIndentWidth: 4 41 | ContinuationIndentWidth: 4 42 | Cpp11BracedListStyle: true 43 | DerivePointerAlignment: false 44 | DisableFormat: false 45 | ForEachMacros: [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ] 46 | IncludeCategories: 47 | - Regex: '^<.*\.h(pp)?>' 48 | Priority: 1 49 | - Regex: '^<.*' 50 | Priority: 2 51 | - Regex: '.*' 52 | Priority: 3 53 | IndentCaseLabels: true 54 | IndentWidth: 2 55 | IndentWrappedFunctionNames: false 56 | KeepEmptyLinesAtTheStartOfBlocks: false 57 | MacroBlockBegin: '' 58 | MacroBlockEnd: '' 59 | MaxEmptyLinesToKeep: 1 60 | NamespaceIndentation: None 61 | ObjCBlockIndentWidth: 2 62 | ObjCSpaceAfterProperty: false 63 | ObjCSpaceBeforeProtocolList: false 64 | PenaltyBreakBeforeFirstCallParameter: 1 65 | PenaltyBreakComment: 300 66 | PenaltyBreakFirstLessLess: 120 67 | PenaltyBreakString: 1000 68 | PenaltyExcessCharacter: 1000000 69 | PenaltyReturnTypeOnItsOwnLine: 200 70 | PointerAlignment: Left 71 | ReflowComments: true 72 | SortIncludes: true 73 | SpaceAfterCStyleCast: false 74 | SpaceBeforeAssignmentOperators: true 75 | SpaceBeforeParens: ControlStatements 76 | SpaceInEmptyParentheses: false 77 | SpacesBeforeTrailingComments: 1 78 | SpacesInAngles: false 79 | SpacesInContainerLiterals: true 80 | SpacesInCStyleCastParentheses: false 81 | SpacesInParentheses: false 82 | SpacesInSquareBrackets: false 83 | Standard: Cpp11 84 | TabWidth: 8 85 | UseTab: Never 86 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = W503, E203, E221, C901 6 | max-line-length = 100 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | exclude = build,__init__.py 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | .vscode/* 7 | flickr30k_datasets/* 8 | 9 | *.jpg 10 | *.png 11 | *.txt 12 | 13 | # compilation and distribution 14 | __pycache__ 15 | _ext 16 | *.pyc 17 | *.so 18 | detectron2.egg-info/ 19 | build/ 20 | dist/ 21 | 22 | # pytorch/python/numpy formats 23 | *.pth 24 | *.pkl 25 | *.npy 26 | 27 | # ipython/jupyter notebooks 28 | *.ipynb 29 | **/.ipynb_checkpoints/ 30 | 31 | # Editor temporaries 32 | *.swn 33 | *.swo 34 | *.swp 35 | *~ 36 | 37 | # Pycharm editor settings 38 | .idea 39 | 40 | # project dirs 41 | /datasets 42 | /models 43 | /RefSegDatasets 44 | .github/ 45 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to detectron2 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from \muaster`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to detectron2, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. 32 | -------------------------------------------------------------------------------- /GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | ## Relation-aware instance refinement for weakly supervised visual grounding 2 | 3 | ### 1. build detectron2 by following the official instrucment 4 | 5 | ### 2. training for flickr30k entities 6 | ``` 7 | sh scripts/train.sh ## change the MODEL.VG.NETWORK 'RegRel' into "Baseline", 'Baseline_s2', 'Reg' to get the ablation study results 8 | ``` 9 | 10 | ### 3. training for KAC models 11 | 12 | ``` 13 | sh scripts/train_kac.sh ## get the final results 14 | 15 | ``` 16 | 17 | ### 4. data preparation 18 | 19 | We will release the processed dataset later. -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) also has step-by-step instructions that install detectron2. 4 | 5 | ### Requirements 6 | - Python >= 3.6 7 | - PyTorch 1.3 8 | - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. 9 | You can install them together at [pytorch.org](http://pytorch.org) to make sure of this. 10 | - OpenCV, needed by demo and visualization 11 | - [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'` 12 | - pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'` 13 | - GCC >= 4.9 14 | - apex 15 | git clone https://www.github.com/nvidia/apex 16 | cd apex 17 | python setup.py install 18 | - datasets 19 | 20 | 21 | 22 | ### Build detectron2 23 | 24 | After having the above dependencies, run: 25 | ``` 26 | git clone git@github.com:facebookresearch/detectron2.git 27 | cd detectron2 28 | export TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" ## compile for every platform 29 | python setup.py build develop 30 | 31 | # or if you are on macOS 32 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 33 | 34 | # or, as an alternative to `setup.py`, do 35 | # pip install . 36 | ``` 37 | Note: you may need to rebuild detectron2 after reinstalling a different build of PyTorch. 38 | 39 | ### Common Installation Issues 40 | 41 | + Undefined torch/aten symbols, or segmentation fault immediately when running the library. 42 | This may mean one of the two: 43 | 44 | * detectron2 or torchvision is not compiled with the version of PyTorch you're running. 45 | 46 | If you use a pre-built torchvision, uninstall torchvision & pytorch, and reinstall them 47 | following [pytorch.org](http://pytorch.org). 48 | If you manually build detectron2 or torchvision, remove the files you built (`build/`, `**/*.so`) 49 | and rebuild them. 50 | 51 | * detectron2 or torchvision is not compiled using gcc >= 4.9. 52 | 53 | You'll see a warning message during compilation in this case. Please remove the files you build, 54 | and rebuild them. 55 | Technically, you need the identical compiler that's used to build pytorch to guarantee 56 | compatibility. But in practice, gcc >= 4.9 should work OK. 57 | 58 | + Undefined cuda symbols. The version of NVCC you use to build detectron2 or torchvision does 59 | not match the version of cuda you are running with. 60 | This happens sometimes when using anaconda. 61 | 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Relation-aware instance refinement for weakly supervised visual grounding 2 | 3 | ### 1. build detectron2 by following the official instrucment 4 | 5 | ### 2. training for flickr30k entities 6 | ``` 7 | sh scripts/train.sh ## change the MODEL.VG.NETWORK 'RegRel' into "Baseline", 'Baseline_s2', 'Reg' to get the ablation study results 8 | ``` 9 | 10 | ### 3. training for KAC models 11 | 12 | ``` 13 | sh scripts/train_kac.sh ## get the final results 14 | 15 | ``` 16 | 17 | ### 4. data preparation 18 | 19 | We will release the processed dataset later. -------------------------------------------------------------------------------- /configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | 19 | -------------------------------------------------------------------------------- /configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | -------------------------------------------------------------------------------- /configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | DATASETS: 15 | TRAIN: ("coco_2017_train",) 16 | TEST: ("coco_2017_val",) 17 | SOLVER: 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 20 | STEPS: (60000, 80000) 21 | MAX_ITER: 90000 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | -------------------------------------------------------------------------------- /configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2014_train",) 9 | TEST: ("coco_2014_val",) 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | KEYPOINT_ON: True 4 | ROI_HEADS: 5 | NUM_CLASSES: 1 6 | ROI_BOX_HEAD: 7 | SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss 8 | RPN: 9 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 10 | # 1000 proposals per-image is found to hurt box AP. 11 | # Therefore we increase it to 1500 per-image. 12 | POST_NMS_TOPK_TRAIN: 1500 13 | DATASETS: 14 | TRAIN: ("keypoints_coco_2017_train",) 15 | TEST: ("keypoints_coco_2017_val",) 16 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (210000, 250000) 12 | MAX_ITER: 270000 13 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | # For better, more stable performance initialize from COCO 5 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 6 | MASK_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 8 9 | # This is the setting used in Mask R-CNN paper, Appendix A 10 | INPUT: 11 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 12 | MIN_SIZE_TRAIN_SAMPLING: "choice" 13 | MIN_SIZE_TEST: 1024 14 | MAX_SIZE_TRAIN: 2048 15 | MAX_SIZE_TEST: 2048 16 | DATASETS: 17 | TRAIN: ("cityscapes_fine_instance_seg_train",) 18 | TEST: ("cityscapes_fine_instance_seg_val",) 19 | SOLVER: 20 | BASE_LR: 0.01 21 | STEPS: (18000,) 22 | MAX_ITER: 24000 23 | IMS_PER_BATCH: 8 24 | TEST: 25 | EVAL_PERIOD: 8000 26 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | INPUT: 16 | # no scale augmentation 17 | MIN_SIZE_TRAIN: (800, ) 18 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_KEYPOINT_HEAD: 10 | POOLER_RESOLUTION: 14 11 | POOLER_SAMPLING_RATIO: 2 12 | POOLER_TYPE: "ROIAlign" 13 | # Detectron1 uses smooth L1 loss with some magic beta values. 14 | # The defaults are changed to L1 loss in Detectron2. 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 17 | POOLER_SAMPLING_RATIO: 2 18 | POOLER_TYPE: "ROIAlign" 19 | RPN: 20 | SMOOTH_L1_BETA: 0.1111 21 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 22 | # 1000 proposals per-image is found to hurt box AP. 23 | # Therefore we increase it to 1500 per-image. 24 | POST_NMS_TOPK_TRAIN: 1500 25 | DATASETS: 26 | TRAIN: ("keypoints_coco_2017_train",) 27 | TEST: ("keypoints_coco_2017_val",) 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | ROI_MASK_HEAD: 16 | POOLER_SAMPLING_RATIO: 2 17 | POOLER_TYPE: "ROIAlign" 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1230 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v0.5_train",) 18 | TEST: ("lvis_v0.5_val",) 19 | TEST: 20 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 21 | DATALOADER: 22 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 23 | REPEAT_THRESHOLD: 0.001 24 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_CONV: 4 16 | NUM_FC: 1 17 | NORM: "GN" 18 | CLS_AGNOSTIC_BBOX_REG: True 19 | ROI_MASK_HEAD: 20 | NUM_CONV: 8 21 | NORM: "GN" 22 | RPN: 23 | POST_NMS_TOPK_TRAIN: 2000 24 | SOLVER: 25 | IMS_PER_BATCH: 128 26 | STEPS: (35000, 45000) 27 | MAX_ITER: 50000 28 | BASE_LR: 0.16 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 864) 31 | MIN_SIZE_TRAIN_SAMPLING: "range" 32 | MAX_SIZE_TRAIN: 1440 33 | CROP: 34 | ENABLED: True 35 | TEST: 36 | EVAL_PERIOD: 2500 37 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_BOX_HEAD: 8 | CLS_AGNOSTIC_BBOX_REG: True 9 | ROI_MASK_HEAD: 10 | CLS_AGNOSTIC_MASK: True 11 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "GN" 8 | STRIDE_IN_1X1: False 9 | FPN: 10 | NORM: "GN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "GN" 16 | ROI_MASK_HEAD: 17 | NORM: "GN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | -------------------------------------------------------------------------------- /configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml: -------------------------------------------------------------------------------- 1 | # A large PanopticFPN for demo purposes. 2 | # Use GN on backbone to support semantic seg. 3 | # Use Cascade + Deform Conv to improve localization. 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" 5 | MODEL: 6 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" 7 | RESNETS: 8 | DEPTH: 101 9 | NORM: "GN" 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | STRIDE_IN_1X1: False 12 | FPN: 13 | NORM: "GN" 14 | ROI_HEADS: 15 | NAME: CascadeROIHeads 16 | ROI_BOX_HEAD: 17 | CLS_AGNOSTIC_BBOX_REG: True 18 | ROI_MASK_HEAD: 19 | NORM: "GN" 20 | RPN: 21 | POST_NMS_TOPK_TRAIN: 2000 22 | SOLVER: 23 | STEPS: (105000, 125000) 24 | MAX_ITER: 135000 25 | IMS_PER_BATCH: 32 26 | BASE_LR: 0.04 27 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | # INPUT: 3 | # It makes sense to divide by STD when training from scratch 4 | # But it seems to make no difference on the results and C2's models did't do this. 5 | # So we keep things consistent with C2. 6 | # PIXEL_STD: [57.375, 57.12, 58.395] 7 | MODEL: 8 | WEIGHTS: "" 9 | MASK_ON: True 10 | BACKBONE: 11 | FREEZE_AT: 0 12 | -------------------------------------------------------------------------------- /configs/Misc/semantic_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/RefSeg_deeplab_Dilate_1x_graph.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "StandardPooler" 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | MASK_ON: False 10 | RESNETS: 11 | DEPTH: 50 12 | PRINT_WINDOWSIZE: 13 | VIS_TRAIN: 100 14 | VIS_TEST: 100 15 | VG: 16 | SPATIAL_FEAT: True 17 | PHRASE_SELECT_TYPE: "Sum" 18 | MAX_PHRASE_LEN: 5 19 | VOCAB_FILE: "./RefSegDatasets/refseg_anno_new/refcoco+/vocab.json" 20 | FEAT_SOURCE: 'bbox' 21 | USE_EDGE_STRUCT: True 22 | USE_REL_FEAT: True 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 7 25 | POOLER_SAMPLING_RATIO: 2 26 | 27 | BACKBONE: 28 | NAME: "build_deeplabv2_backbone" 29 | PRETRAIN_PATH: './RefSegDatasets/refseg_anno_new/backbone_weight/deeplabv2.pth' 30 | 31 | 32 | 33 | DATASETS: 34 | TRAIN: ("refcoco+_train",) 35 | TEST: ("refcoco+_val",) 36 | 37 | SOLVER: 38 | MASK_SOFTMAX: True 39 | LR_SCHEDULER_NAME: "WarmupMultiStepLR" ## "WarmupPolyLR" 40 | IMS_PER_BATCH: 8 41 | BASE_LR: 0.02 42 | STEPS: (20000, 40000) 43 | MAX_ITER: 1000 44 | WEIGHT_DECAY: 0.0005 45 | WEIGHT_DECAY_BIAS: 0.0005 46 | BIAS_LR_FACTOR: 1.0 47 | WEIGHT_DECAY_NORM: 0.0005 48 | WARMUP_ITERS: 1000 49 | WARMUP_FACTOR: 0.001 50 | POLY_POWER: 0.9 51 | POLY_END_LR: 0.00001 52 | CHECKPOINT_PERIOD: 4000 53 | BACKBONE_LR_FACTOR: 0.1 54 | FIX_BACKBONE: False 55 | FIX_BACKBONE_BN: False 56 | USING_APEX: False 57 | OPTIMIZER: "Adam" # Sgd 58 | INIT_PARA: True 59 | 60 | INPUT: 61 | MIN_SIZE_TRAIN: (321,) 62 | MIN_SIZE_TEST: 321 63 | 64 | TEST: 65 | EVAL_PERIOD: 4000 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /configs/WeaklyGrounding-RN101-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "StandardPooler" 8 | WEIGHTS: "" 9 | MASK_ON: False 10 | RESNETS: 11 | DEPTH: 101 12 | PRINT_WINDOWSIZE: 13 | VIS_TRAIN: 100 14 | VIS_TEST: 100 15 | VG: 16 | SPATIAL_FEAT: True 17 | PHRASE_SELECT_TYPE: "Sum" 18 | VOCAB_PHR_FILE: "./flickr30k_datasets/skip-thoughts/vocab_phr.json" 19 | VOCAB_FILE: "./flickr30k_datasets/skip-thoughts/vocab.json" 20 | GLOVE_DICT_FILE: './flickr30k_datasets/flickr30k_anno/skip_thoughts_dict.pkl' 21 | VOCAB_EMBEDDING_FILE: './flickr30k_datasets/flickr30k_anno/skip_vocab_embed.pkl' 22 | VOCAB_REL_FILE: "./flickr30k_datasets/skip-thoughts/vocab_rel.json" 23 | ATTR_DICT_PATH: '' 24 | ATTR_VOCAB_PATH: '' 25 | MAX_PHRASE_LEN: 5 26 | EVAL_THRESH: 0.5 27 | PRECOMP_TOPK: 30 28 | USING_ELMO: False 29 | EMBEDDING_SOURCE: 'Sent' 30 | LSTM_BIDIRECTION: False 31 | USING_DET_KNOWLEDGE: False 32 | USING_DET_SCORE: False 33 | NUM_CST_PHR: 1 34 | USING_CST_RCONST: False 35 | S2_TOPK: 10 36 | S3_TOPK: 6 37 | REGRESSION: False 38 | ML_REG: False 39 | REG_LOSS_FACTOR: 0.1 40 | REG_IOU: 0.7 41 | NON_MAX_REG_LOSS: False 42 | REG_GAP_SCORE: 0.3 43 | NETWORK: 'Baseline' 44 | REG_THRESH: 0.5 45 | LOSS_STAGE_FACTOR: 0.5 46 | ATTR_LOSS_FACTOR: 1.0 47 | REL_CLS_LOSS_FACTOR: 1.0 48 | SEMANTIC_NOUNS_TOPK: 600 49 | SEMANTIC_NOUNS_PATH: './flickr30k_datasets/flickr30k_anno/noun_cates.json' 50 | SEMANTIC_ATTR_TOPK: 79 51 | SEMANTIC_ATTR_PATH: './flickr30k_datasets/flickr30k_anno/attr_cates.json' 52 | SEM_NOUNS_LOSS_FACTOR: 1.0 53 | MESSAGE_PARAM: False 54 | 55 | RELATION: 56 | IS_ON: False 57 | INTRA_LAN: False 58 | VIG_ON: False 59 | JOINT_TRANS: False 60 | REL_CATE_PATH: './flickr30k_datasets/flickr30k_anno/rel_cates.json' 61 | REL_LOSS_FACTOR: 1.0 62 | 63 | 64 | ROI_BOX_HEAD: 65 | POOLER_RESOLUTION: 7 66 | POOLER_SAMPLING_RATIO: 2 67 | 68 | BACKBONE: 69 | NAME: "build_resnet101_backbone" 70 | PRETRAIN_PATH: './flickr30k_datasets/bottom-up-pretrained/bottomup_pretrained_10_100.pth' 71 | PRETRAIN_CLSW_PATH: './flickr30k_datasets/bottom-up-pretrained/bottomup_pretrained_clsw.pth' 72 | LAST_LAYER_STRIDE: 1 73 | 74 | DATALOADER: 75 | ASPECT_RATIO_GROUPING: True 76 | 77 | DATASETS: 78 | TRAIN: ("flickr30k_train",) 79 | TEST: ("flickr30k_val",) 80 | NAME: 'flickr30k' 81 | 82 | SOLVER: 83 | LR_SCHEDULER_NAME: "WarmupMultiStepLR" ## "WarmupPolyLR" 84 | IMS_PER_BATCH: 8 85 | BASE_LR: 0.02 86 | STEPS: (20000, 40000) 87 | MAX_ITER: 1000 88 | REG_START_ITER: 7500 89 | WEIGHT_DECAY: 0.0005 90 | WEIGHT_DECAY_BIAS: 0.0005 91 | BIAS_LR_FACTOR: 1.0 92 | WEIGHT_DECAY_NORM: 0.0005 93 | WARMUP_ITERS: 500 94 | WARMUP_FACTOR: 0.001 95 | POLY_POWER: 0.9 96 | POLY_END_LR: 0.00001 97 | CHECKPOINT_PERIOD: 4000 98 | BACKBONE_LR_FACTOR: 0.1 99 | FIX_BACKBONE: True 100 | USING_APEX: False 101 | OPTIMIZER: "Adam" # Sgd 102 | INIT_PARA: False 103 | ATTR_CLS_LOSS: True 104 | 105 | 106 | INPUT: 107 | MIN_SIZE_TRAIN: (321,) 108 | MIN_SIZE_TEST: 321 109 | 110 | SEED: 10 111 | 112 | TEST: 113 | EVAL_PERIOD: 4000 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /configs/quick_schedules/README.md: -------------------------------------------------------------------------------- 1 | These are quick configs for performance or accuracy regression tracking purposes. 2 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 7 | TEST: ("coco_2017_val_100",) 8 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 9 | SOLVER: 10 | BASE_LR: 0.005 11 | STEPS: (30,) 12 | MAX_ITER: 40 13 | IMS_PER_BATCH: 4 14 | DATALOADER: 15 | NUM_WORKERS: 2 16 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" 4 | DATASETS: 5 | TEST: ("keypoints_coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | DATASETS: 6 | TRAIN: ("keypoints_coco_2017_val_100",) 7 | TEST: ("keypoints_coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False 14 | LOSS_WEIGHT: 4.0 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 17 | RPN: 18 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 19 | DATASETS: 20 | TRAIN: ("keypoints_coco_2017_val",) 21 | TEST: ("keypoints_coco_2017_val",) 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | SOLVER: 25 | WARMUP_FACTOR: 0.33333333 26 | WARMUP_ITERS: 100 27 | STEPS: (5500, 5800) 28 | MAX_ITER: 6000 29 | TEST: 30 | EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] 31 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | ROI_BOX_HEAD: 14 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 15 | RPN: 16 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 17 | DATASETS: 18 | TRAIN: ("keypoints_coco_2017_val",) 19 | TEST: ("keypoints_coco_2017_val",) 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | SOLVER: 23 | WARMUP_FACTOR: 0.33333333 24 | WARMUP_ITERS: 100 25 | STEPS: (5500, 5800) 26 | MAX_ITER: 6000 27 | TEST: 28 | EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # base uses 16 17 | WARMUP_FACTOR: 0.33333 18 | WARMUP_ITERS: 100 19 | STEPS: (11000, 11600) 20 | MAX_ITER: 12000 21 | TEST: 22 | EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] 23 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02]] 8 | # expected results do not use test-time augmentation. TTA results are not verified. 9 | AUG: 10 | ENABLED: True 11 | MIN_SIZES: (400, 500) # to save some time 12 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | WARMUP_FACTOR: 0.3333333 17 | WARMUP_ITERS: 100 18 | STEPS: (5500, 5800) 19 | MAX_ITER: 6000 20 | TEST: 21 | EXPECTED_RESULTS: [["bbox", "AP", 42.8, 0.8], ["segm", "AP", 35.7, 0.8]] 22 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100_panoptic_separated",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_100_panoptic_separated",) 12 | TEST: ("coco_2017_val_100_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.005 15 | STEPS: (30,) 16 | MAX_ITER: 40 17 | IMS_PER_BATCH: 4 18 | DATALOADER: 19 | NUM_WORKERS: 2 20 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_panoptic_separated",) 12 | TEST: ("coco_2017_val_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.01 15 | WARMUP_FACTOR: 0.001 16 | WARMUP_ITERS: 500 17 | STEPS: (5500,) 18 | MAX_ITER: 7000 19 | TEST: 20 | EXPECTED_RESULTS: [["bbox", "AP", 46.80, 1.1], ["segm", "AP", 38.93, 0.7], ["sem_seg", "mIoU", 63.99, 0.9], ["panoptic_seg", "PQ", 48.23, 0.8]] 21 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 44.36, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | BASE_LR: 0.005 9 | STEPS: (30,) 10 | MAX_ITER: 40 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | STEPS: (30,) 9 | MAX_ITER: 40 10 | BASE_LR: 0.005 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: 10 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | SOLVER: 13 | BASE_LR: 0.005 14 | STEPS: (30,) 15 | MAX_ITER: 40 16 | IMS_PER_BATCH: 4 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WARMUP_FACTOR: 0.001 13 | WARMUP_ITERS: 300 14 | STEPS: (5500,) 15 | MAX_ITER: 7000 16 | TEST: 17 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `detectron2` for inference. 4 | 5 | You can start it by running it from this folder, using one of the following commands: 6 | ```bash 7 | # by default, it runs on the GPU 8 | # for best results, use min-image-size 800 9 | python webcam.py --min-image-size 800 10 | # can also run it on the CPU 11 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 12 | # or change the model that you want to use 13 | python webcam.py --config-file ../configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml --min-image-size 300 MODEL.DEVICE cpu 14 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 15 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 16 | ``` 17 | -------------------------------------------------------------------------------- /detectron2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | 8 | __version__ = "0.1" 9 | -------------------------------------------------------------------------------- /detectron2/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | 6 | from . import model_zoo as _UNUSED # register the handler 7 | from .detection_checkpoint import DetectionCheckpointer 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /detectron2/checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import pickle 3 | from fvcore.common.checkpoint import Checkpointer 4 | from fvcore.common.file_io import PathManager 5 | 6 | import detectron2.utils.comm as comm 7 | 8 | from .c2_model_loading import align_and_update_state_dicts 9 | 10 | 11 | class DetectionCheckpointer(Checkpointer): 12 | """ 13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 14 | model zoo, and apply conversions for legacy models. 15 | """ 16 | 17 | def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): 18 | is_main_process = comm.is_main_process() 19 | super().__init__( 20 | model, 21 | save_dir, 22 | save_to_disk=is_main_process if save_to_disk is None else save_to_disk, 23 | **checkpointables, 24 | ) 25 | 26 | def _load_file(self, filename): 27 | if filename.endswith(".pkl"): 28 | with PathManager.open(filename, "rb") as f: 29 | data = pickle.load(f, encoding="latin1") 30 | if "model" in data and "__author__" in data: 31 | # file is in Detectron2 model zoo format 32 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 33 | return data 34 | else: 35 | # assume file is from Caffe2 / Detectron1 model zoo 36 | if "blobs" in data: 37 | # Detection models have "blobs", but ImageNet models don't 38 | data = data["blobs"] 39 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 40 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 41 | 42 | loaded = super()._load_file(filename) # load native pth checkpoint 43 | if "model" not in loaded: 44 | loaded = {"model": loaded} 45 | return loaded 46 | 47 | def _load_model(self, checkpoint): 48 | if checkpoint.get("matching_heuristics", False): 49 | self._convert_ndarray_to_tensor(checkpoint["model"]) 50 | # convert weights by name-matching heuristics 51 | model_state_dict = self.model.state_dict() 52 | align_and_update_state_dicts( 53 | model_state_dict, 54 | checkpoint["model"], 55 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2", 56 | ) 57 | checkpoint["model"] = model_state_dict 58 | # for non-caffe2 models, use standard ways to load it 59 | super()._load_model(checkpoint) 60 | -------------------------------------------------------------------------------- /detectron2/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .compat import downgrade_config, upgrade_config 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg 4 | 5 | 6 | __all__ = [ 7 | "CfgNode", 8 | "get_cfg", 9 | "global_cfg", 10 | "set_global_cfg", 11 | "downgrade_config", 12 | "upgrade_config", 13 | ] 14 | -------------------------------------------------------------------------------- /detectron2/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_detection_test_loader, 6 | build_detection_train_loader, 7 | get_detection_dataset_dicts, 8 | load_proposals_into_dataset, 9 | print_instances_class_histogram, 10 | ) 11 | from .catalog import DatasetCatalog, MetadataCatalog 12 | from .common import DatasetFromList, MapDataset 13 | from .dataset_mapper import DatasetMapper 14 | 15 | # ensure the builtin datasets are registered 16 | from . import datasets, samplers # isort:skip 17 | 18 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 19 | -------------------------------------------------------------------------------- /detectron2/data/datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Common Datasets 4 | 5 | The dataset implemented here do not need to load the data into the final format. 6 | It should provide the minimal data strcture needed to use the dataset, so it can be very efficient. 7 | 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images. 9 | Let the downstream decide how to read. 10 | -------------------------------------------------------------------------------- /detectron2/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .cityscapes import load_cityscapes_instances 3 | from .coco import load_coco_json, load_sem_seg 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta 5 | from .register_coco import register_coco_instances, register_coco_panoptic_separated 6 | from . import builtin # ensure the builtin datasets are registered 7 | from .refcoco import register_refcoco, load_refcoco_instances 8 | 9 | 10 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /detectron2/data/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from fvcore.common.file_io import PathManager 5 | import os 6 | import numpy as np 7 | import xml.etree.ElementTree as ET 8 | 9 | from detectron2.structures import BoxMode 10 | from detectron2.data import DatasetCatalog, MetadataCatalog 11 | 12 | 13 | __all__ = ["register_pascal_voc"] 14 | 15 | 16 | # fmt: off 17 | CLASS_NAMES = [ 18 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 19 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 20 | "pottedplant", "sheep", "sofa", "train", "tvmonitor", 21 | ] 22 | # fmt: on 23 | 24 | 25 | def load_voc_instances(dirname: str, split: str): 26 | """ 27 | Load Pascal VOC detection annotations to Detectron2 format. 28 | 29 | Args: 30 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 31 | split (str): one of "train", "test", "val", "trainval" 32 | """ 33 | with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: 34 | fileids = np.loadtxt(f, dtype=np.str) 35 | 36 | dicts = [] 37 | for fileid in fileids: 38 | anno_file = os.path.join(dirname, "Annotations", fileid + ".xml") 39 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") 40 | 41 | tree = ET.parse(anno_file) 42 | 43 | r = { 44 | "file_name": jpeg_file, 45 | "image_id": fileid, 46 | "height": int(tree.findall("./size/height")[0].text), 47 | "width": int(tree.findall("./size/width")[0].text), 48 | } 49 | instances = [] 50 | 51 | for obj in tree.findall("object"): 52 | cls = obj.find("name").text 53 | # We include "difficult" samples in training. 54 | # Based on limited experiments, they don't hurt accuracy. 55 | # difficult = int(obj.find("difficult").text) 56 | # if difficult == 1: 57 | # continue 58 | bbox = obj.find("bndbox") 59 | bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] 60 | # Original annotations are integers in the range [1, W or H] 61 | # Assuming they mean 1-based pixel indices (inclusive), 62 | # a box with annotation (xmin=1, xmax=W) covers the whole image. 63 | # In coordinate space this is represented by (xmin=0, xmax=W) 64 | bbox[0] -= 1.0 65 | bbox[1] -= 1.0 66 | instances.append( 67 | {"category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} 68 | ) 69 | r["annotations"] = instances 70 | dicts.append(r) 71 | return dicts 72 | 73 | 74 | def register_pascal_voc(name, dirname, split, year): 75 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split)) 76 | MetadataCatalog.get(name).set( 77 | thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split 78 | ) 79 | -------------------------------------------------------------------------------- /detectron2/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, DistributedSampler, IterationBasedBatchSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | "DistributedSampler", 11 | "IterationBasedBatchSampler" 12 | ] 13 | -------------------------------------------------------------------------------- /detectron2/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | 13 | Arguments: 14 | sampler (Sampler): Base sampler. 15 | group_ids (list[int]): If the sampler produces indices in range [0, N), 16 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 17 | The group ids must be a continuous set of integers starting from 18 | 0, i.e. they must be in the range [0, num_groups). 19 | batch_size (int): Size of mini-batch. 20 | """ 21 | 22 | def __init__(self, sampler, group_ids, batch_size): 23 | if not isinstance(sampler, Sampler): 24 | raise ValueError( 25 | "sampler should be an instance of " 26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 27 | ) 28 | self.sampler = sampler 29 | self.group_ids = torch.as_tensor(group_ids) 30 | assert self.group_ids.dim() == 1 31 | self.batch_size = batch_size 32 | self.groups = torch.unique(self.group_ids).sort(0)[0] 33 | # group ids must range in [0, #group) 34 | assert self.groups[0].item() == 0 and self.groups[-1].item() == len(self.groups) - 1 35 | 36 | # buffer the indices of each group until batch size is reached 37 | self.buffer_per_group = [[] for k in self.groups] 38 | 39 | def __iter__(self): 40 | for idx in self.sampler: 41 | group_id = self.group_ids[idx] 42 | group_buffer = self.buffer_per_group[group_id] 43 | group_buffer.append(idx) 44 | if len(group_buffer) == self.batch_size: 45 | yield group_buffer[:] # yield a copy of the list 46 | del group_buffer[:] 47 | -------------------------------------------------------------------------------- /detectron2/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .transform import * 3 | from fvcore.transforms.transform import * 4 | from .transform_gen import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | -------------------------------------------------------------------------------- /detectron2/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 10 | # but still make them available here 11 | from .hooks import * 12 | from .defaults import * 13 | -------------------------------------------------------------------------------- /detectron2/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .cityscapes_evaluation import CityscapesEvaluator 3 | from .coco_evaluation import COCOEvaluator 4 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 5 | from .lvis_evaluation import LVISEvaluator 6 | from .panoptic_evaluation import COCOPanopticEvaluator 7 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 8 | from .sem_seg_evaluation import SemSegEvaluator 9 | from .testing import print_csv_format, verify_results 10 | from .recoco_evaluation_grounding import RECOCOEvaluator 11 | from .flickr30k_evaluation_grounding import FLICKR30KEvaluator 12 | from .flickr30k_evaluation_grounding_lite import FLICKR30KEvaluator as FLICKR30KEvaluatorLite 13 | from .flickr30k_evaluation_grounding_v1 import FLICKR30KEvaluator as FLICKR30KEvaluatorV1 14 | from .flickr30k_evaluation_grounding_reg import FLICKR30KEvaluator as FLICKR30KEvaluatorReg 15 | from .flickr30k_evaluation_kac import FLICKR30KEvaluator as FLICKR30KEvaluatorKAC 16 | from .flickr30k_evaluation_grounding_reg_ml import FLICKR30KEvaluator as FLICKR30KEvaluatorREGML 17 | 18 | 19 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 20 | -------------------------------------------------------------------------------- /detectron2/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import Mapping, OrderedDict 7 | from ..utils.events import training_tags, val_tags 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 19 | logger = logging.getLogger(__name__) 20 | for task, res in results.items(): 21 | # Don't print "AP-category" metrics since they are usually not tracked. 22 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 23 | logger.info("copypaste: Task: {}".format(task)) 24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 26 | 27 | 28 | def verify_results(cfg, results): 29 | """ 30 | Args: 31 | results (OrderedDict[dict]): task_name -> {metric -> score} 32 | 33 | Returns: 34 | bool: whether the verification succeeds or not 35 | """ 36 | expected_results = cfg.TEST.EXPECTED_RESULTS 37 | if not len(expected_results): 38 | return True 39 | 40 | ok = True 41 | for task, metric, expected, tolerance in expected_results: 42 | actual = results[task][metric] 43 | if not np.isfinite(actual): 44 | ok = False 45 | diff = abs(actual - expected) 46 | if diff > tolerance: 47 | ok = False 48 | 49 | logger = logging.getLogger(__name__) 50 | if not ok: 51 | logger.error("Result verification failed!") 52 | logger.error("Expected Results: " + str(expected_results)) 53 | logger.error("Actual Results: " + pprint.pformat(results)) 54 | 55 | sys.exit(1) 56 | else: 57 | logger.info("Results verification passed.") 58 | return ok 59 | 60 | 61 | def flatten_results_dict(results): 62 | """ 63 | Expand a hierarchical dict of scalars into a flat dict of scalars. 64 | If results[k1][k2][k3] = v, the returned dict will have the entry 65 | {"k1/k2/k3": v}. 66 | 67 | Args: 68 | results (dict): 69 | """ 70 | r = {} 71 | for k, v in results.items(): 72 | if isinstance(v, Mapping): 73 | v = flatten_results_dict(v) 74 | for kk, vv in v.items(): 75 | r[k + "/" + kk] = vv 76 | else: 77 | r[k] = v 78 | return r 79 | -------------------------------------------------------------------------------- /detectron2/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deform_conv import DeformConv, ModulatedDeformConv 4 | from .mask_ops import paste_masks_in_image 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated 6 | from .roi_align import ROIAlign, roi_align 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 8 | from .shape_spec import ShapeSpec 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate 10 | 11 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 12 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor ROIAlignRotated_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio); 14 | 15 | at::Tensor ROIAlignRotated_backward_cpu( 16 | const at::Tensor& grad, 17 | const at::Tensor& rois, 18 | const float spatial_scale, 19 | const int pooled_height, 20 | const int pooled_width, 21 | const int batch_size, 22 | const int channels, 23 | const int height, 24 | const int width, 25 | const int sampling_ratio); 26 | 27 | #ifdef WITH_CUDA 28 | at::Tensor ROIAlignRotated_forward_cuda( 29 | const at::Tensor& input, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int sampling_ratio); 35 | 36 | at::Tensor ROIAlignRotated_backward_cuda( 37 | const at::Tensor& grad, 38 | const at::Tensor& rois, 39 | const float spatial_scale, 40 | const int pooled_height, 41 | const int pooled_width, 42 | const int batch_size, 43 | const int channels, 44 | const int height, 45 | const int width, 46 | const int sampling_ratio); 47 | #endif 48 | 49 | // Interface for Python 50 | inline at::Tensor ROIAlignRotated_forward( 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const float spatial_scale, 54 | const int pooled_height, 55 | const int pooled_width, 56 | const int sampling_ratio) { 57 | if (input.type().is_cuda()) { 58 | #ifdef WITH_CUDA 59 | return ROIAlignRotated_forward_cuda( 60 | input, 61 | rois, 62 | spatial_scale, 63 | pooled_height, 64 | pooled_width, 65 | sampling_ratio); 66 | #else 67 | AT_ERROR("Not compiled with GPU support"); 68 | #endif 69 | } 70 | return ROIAlignRotated_forward_cpu( 71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 72 | } 73 | 74 | inline at::Tensor ROIAlignRotated_backward( 75 | const at::Tensor& grad, 76 | const at::Tensor& rois, 77 | const float spatial_scale, 78 | const int pooled_height, 79 | const int pooled_width, 80 | const int batch_size, 81 | const int channels, 82 | const int height, 83 | const int width, 84 | const int sampling_ratio) { 85 | if (grad.type().is_cuda()) { 86 | #ifdef WITH_CUDA 87 | return ROIAlignRotated_backward_cuda( 88 | grad, 89 | rois, 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio); 98 | #else 99 | AT_ERROR("Not compiled with GPU support"); 100 | #endif 101 | } 102 | return ROIAlignRotated_backward_cpu( 103 | grad, 104 | rois, 105 | spatial_scale, 106 | pooled_height, 107 | pooled_width, 108 | batch_size, 109 | channels, 110 | height, 111 | width, 112 | sampling_ratio); 113 | } 114 | 115 | } // namespace detectron2 116 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #ifdef WITH_CUDA 26 | return box_iou_rotated_cuda(boxes1, boxes2); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1, boxes2); 33 | } 34 | 35 | } // namespace detectron2 36 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto widths1 = boxes1.select(1, 2).contiguous(); 13 | auto heights1 = boxes1.select(1, 3).contiguous(); 14 | auto widths2 = boxes2.select(1, 2).contiguous(); 15 | auto heights2 = boxes2.select(1, 3).contiguous(); 16 | 17 | at::Tensor areas1 = widths1 * heights1; 18 | at::Tensor areas2 = widths2 * heights2; 19 | 20 | auto num_boxes1 = boxes1.size(0); 21 | auto num_boxes2 = boxes2.size(0); 22 | 23 | for (int i = 0; i < num_boxes1; i++) { 24 | for (int j = 0; j < num_boxes2; j++) { 25 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 26 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 27 | } 28 | } 29 | } 30 | 31 | at::Tensor box_iou_rotated_cpu( 32 | const at::Tensor& boxes1, 33 | const at::Tensor& boxes2) { 34 | auto num_boxes1 = boxes1.size(0); 35 | auto num_boxes2 = boxes2.size(0); 36 | at::Tensor ious = 37 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 38 | 39 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 40 | 41 | // reshape from 1d array to 2d array 42 | auto shape = std::vector{num_boxes1, num_boxes2}; 43 | return ious.reshape(shape); 44 | } 45 | 46 | } // namespace detectron2 47 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #ifdef WITH_CUDA 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return nms_rotated_cuda(dets, scores, iou_threshold); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | 35 | return nms_rotated_cpu(dets, scores, iou_threshold); 36 | } 37 | 38 | } // namespace detectron2 39 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.type() == scores.type(), "dets should have the same type as scores"); 20 | 21 | if (dets.numel() == 0) { 22 | return at::empty({0}, dets.options().dtype(at::kLong)); 23 | } 24 | 25 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 26 | 27 | auto ndets = dets.size(0); 28 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 29 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 30 | 31 | auto suppressed = suppressed_t.data_ptr(); 32 | auto keep = keep_t.data_ptr(); 33 | auto order = order_t.data_ptr(); 34 | 35 | int64_t num_to_keep = 0; 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) { 40 | continue; 41 | } 42 | 43 | keep[num_to_keep++] = i; 44 | 45 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 46 | auto j = order[_j]; 47 | if (suppressed[j] == 1) { 48 | continue; 49 | } 50 | 51 | auto ovr = single_box_iou_rotated( 52 | dets[i].data_ptr(), dets[j].data_ptr()); 53 | if (ovr >= iou_threshold) { 54 | suppressed[j] = 1; 55 | } 56 | } 57 | } 58 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 59 | } 60 | 61 | at::Tensor nms_rotated_cpu( 62 | const at::Tensor& dets, 63 | const at::Tensor& scores, 64 | const float iou_threshold) { 65 | auto result = at::empty({0}, dets.options()); 66 | 67 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms_rotated", [&] { 68 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 69 | }); 70 | return result; 71 | } 72 | 73 | } // namespace detectron2 74 | -------------------------------------------------------------------------------- /detectron2/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "ROIAlign/ROIAlign.h" 3 | #include "ROIAlignRotated/ROIAlignRotated.h" 4 | #include "box_iou_rotated/box_iou_rotated.h" 5 | #include "deformable/deform_conv.h" 6 | #include "nms_rotated/nms_rotated.h" 7 | 8 | namespace detectron2 { 9 | 10 | // similar to 11 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 12 | std::string get_compiler_version() { 13 | std::ostringstream ss; 14 | #if defined(__GNUC__) 15 | #ifndef __clang__ 16 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 17 | #endif 18 | #endif 19 | 20 | #if defined(__clang_major__) 21 | { 22 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 23 | << __clang_patchlevel__; 24 | } 25 | #endif 26 | 27 | #if defined(_MSC_VER) 28 | { ss << "MSVC " << _MSC_FULL_VER; } 29 | #endif 30 | return ss.str(); 31 | } 32 | 33 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 34 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 35 | 36 | m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); 37 | 38 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 39 | m.def( 40 | "deform_conv_backward_input", 41 | &deform_conv_backward_input, 42 | "deform_conv_backward_input"); 43 | m.def( 44 | "deform_conv_backward_filter", 45 | &deform_conv_backward_filter, 46 | "deform_conv_backward_filter"); 47 | m.def( 48 | "modulated_deform_conv_forward", 49 | &modulated_deform_conv_forward, 50 | "modulated_deform_conv_forward"); 51 | m.def( 52 | "modulated_deform_conv_backward", 53 | &modulated_deform_conv_backward, 54 | "modulated_deform_conv_backward"); 55 | 56 | m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); 57 | 58 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 59 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 60 | 61 | m.def( 62 | "roi_align_rotated_forward", 63 | &ROIAlignRotated_forward, 64 | "Forward pass for Rotated ROI-Align Operator"); 65 | m.def( 66 | "roi_align_rotated_backward", 67 | &ROIAlignRotated_backward, 68 | "Backward pass for Rotated ROI-Align Operator"); 69 | } 70 | 71 | } // namespace detectron2 72 | -------------------------------------------------------------------------------- /detectron2/layers/gpu_hangon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2020/6/25 12:07 4 | 5 | 6 | 7 | from gpustat.core import GPUStatCollection 8 | 9 | def get_gpu_status(): 10 | 11 | gpus_stats = GPUStatCollection.new_query() 12 | info = gpus_stats.jsonify()["gpus"] 13 | gpu_list = [] 14 | 15 | mem_ratio_threshold = 0.1 # 16 | util_ratio_threshold = 10 # 17 | for idx, each in enumerate(info): 18 | mem_ratio = each["memory.used"] / each["memory.total"] 19 | util_ratio = each["utilization.gpu"] 20 | print(mem_ratio, util_ratio) 21 | if mem_ratio < mem_ratio_threshold and util_ratio < util_ratio_threshold: 22 | gpu_list.append(idx) 23 | print("Scan GPUs to get {} free GPU".format(len(gpu_list))) 24 | return gpu_list 25 | 26 | 27 | if __name__ == '__main__': 28 | 29 | gpu_list = get_gpu_status() 30 | print(gpu_list) 31 | 32 | 33 | -------------------------------------------------------------------------------- /detectron2/layers/move2cpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2020/6/16 19:05 4 | 5 | import torch 6 | 7 | def move2cpu(data_lists): 8 | data_lists = [data.to(torch.device('cpu')) for data in data_lists] 9 | return data_lists -------------------------------------------------------------------------------- /detectron2/layers/numerical_stability_softmax.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2019/10/14 22:19 4 | 5 | 6 | 7 | 8 | 9 | import torch 10 | import numpy as np 11 | 12 | 13 | def numerical_stability_softmax(score, dim, epsilon=1e-6): 14 | 15 | max_score, _ = score.max(dim, keepdim=True) 16 | 17 | stable_score = score - max_score 18 | stable_exp = torch.exp(stable_score) 19 | stable_prob = stable_exp / (stable_exp.sum(dim, keepdim=True)+epsilon) 20 | 21 | return stable_prob 22 | 23 | def numerical_stability_masked_softmax(vec, mask, dim=1, epsilon=1e-6): 24 | 25 | masked_vec = vec * mask.float() 26 | max_vec, _ = masked_vec.max(dim, keepdim=True) 27 | stable_vec = vec - max_vec 28 | stable_exps = torch.exp(stable_vec) 29 | masked_exps = stable_exps * mask.float() 30 | masked_sums = masked_exps.sum(dim, keepdim=True) + epsilon 31 | masked_prob = masked_exps / masked_sums 32 | 33 | return masked_prob 34 | 35 | 36 | def numerical_stability_inner_masked_softmax(vec, mask, dim=1, num_phrases=2, topN=10, epsilon=1e-8): 37 | 38 | mask = mask.float() 39 | 40 | if dim==0: 41 | vec = vec.permute(1,0) 42 | mask = mask.permute(1,0) 43 | 44 | masked_inner_vec = vec * mask 45 | masked_inner_vec = masked_inner_vec.contiguous().view(-1, topN) 46 | inner_mask = mask.contiguous().view(-1, topN) 47 | inner_max_vec, _ = masked_inner_vec.max(1, True) 48 | stable_inner_vec = masked_inner_vec - inner_max_vec 49 | stable_inner_exps = torch.exp(stable_inner_vec) 50 | masked_inner_exps = stable_inner_exps * inner_mask.float() 51 | masked_inner_sums = masked_inner_exps.sum(1, keepdim=True) + epsilon 52 | masked_inner_prob = masked_inner_exps / masked_inner_sums ## (np*N*np)*N 53 | 54 | masked_inner_vec_total = masked_inner_vec.sum(1).contiguous().view(num_phrases*topN, num_phrases) 55 | inner_mask_total = inner_mask.sum(1).contiguous().view(num_phrases*topN, num_phrases).ge(1).float() 56 | masked_inner_vec_total = masked_inner_vec_total * inner_mask_total 57 | inner_max_vec_total, _ = masked_inner_vec_total.max(1, True) 58 | stable_inner_exps_total = torch.exp(masked_inner_vec_total-inner_max_vec_total) 59 | masked_inner_exps_total = stable_inner_exps_total * inner_mask_total 60 | masked_inner_sum_total = masked_inner_exps_total.sum(1, keepdim=True) + epsilon ## (np*N)*np 61 | masked_inner_prob_total = masked_inner_exps_total / masked_inner_sum_total 62 | masked_inner_prob_total = masked_inner_prob_total.contiguous().view(-1).unsqueeze(1) ## (np*N*np) *1 63 | 64 | masked_inner_prob = masked_inner_prob * masked_inner_prob_total 65 | masked_inner_prob = masked_inner_prob.contiguous().view(num_phrases*topN, num_phrases*topN) 66 | 67 | if dim == 0: 68 | masked_inner_prob = masked_inner_prob.permute(1,0) 69 | 70 | return masked_inner_prob 71 | 72 | 73 | 74 | 75 | def masked_softmax(vec, mask, dim=1, epsilon=1e-6): 76 | exps = torch.exp(vec) 77 | masked_exps = exps * mask.float() 78 | masked_sums = masked_exps.sum(dim, keepdim=True) + epsilon 79 | return (masked_exps/masked_sums) 80 | 81 | if __name__ == '__main__': 82 | 83 | import numpy as np 84 | relation_conn = [[0,1],[0,2]] 85 | topN = 10 86 | conn_map = np.zero(30, 30) 87 | 88 | random_matrix = np.random.random((10,10)) 89 | for rel in relation_conn: 90 | conn_map[rel[0]*topN:(rel[0]+1)*topN, rel[1]*topN:(rel[1]+1)*topN] = random_matrix 91 | -------------------------------------------------------------------------------- /detectron2/layers/ops.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2019/12/11 14:01 4 | 5 | 6 | 7 | import numpy as np 8 | import torch.nn as nn 9 | 10 | class Linear(nn.Linear): 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | 14 | # compatible with xavier_initializer in TensorFlow 15 | fan_avg = (self.in_features + self.out_features) / 2. 16 | bound = np.sqrt(3. / fan_avg) 17 | nn.init.uniform_(self.weight, -bound, bound) 18 | if self.bias is not None: 19 | nn.init.constant_(self.bias, 0.) -------------------------------------------------------------------------------- /detectron2/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | # import torch 5 | from detectron2 import _C 6 | 7 | 8 | def pairwise_iou_rotated(boxes1, boxes2): 9 | """ 10 | Return intersection-over-union (Jaccard index) of boxes. 11 | 12 | Both sets of boxes are expected to be in 13 | (x_center, y_center, width, height, angle) format. 14 | 15 | Arguments: 16 | boxes1 (Tensor[N, 5]) 17 | boxes2 (Tensor[M, 5]) 18 | 19 | Returns: 20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 21 | IoU values for every element in boxes1 and boxes2 22 | """ 23 | 24 | return _C.box_iou_rotated(boxes1, boxes2) 25 | -------------------------------------------------------------------------------- /detectron2/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to obtain the shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /detectron2/layers/weighted_smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2020/6/5 14:43 4 | 5 | 6 | 7 | 8 | import torch 9 | def smooth_l1_loss(input: torch.Tensor, target: torch.Tensor, beta: float, weight: torch.Tensor, reduction: str = "none") -> torch.Tensor: 10 | """ 11 | Smooth L1 loss defined in the Fast R-CNN paper as: 12 | 13 | | 0.5 * x ** 2 / beta if abs(x) < beta 14 | smoothl1(x) = | 15 | | abs(x) - 0.5 * beta otherwise, 16 | 17 | where x = input - target. 18 | 19 | Smooth L1 loss is related to Huber loss, which is defined as: 20 | 21 | | 0.5 * x ** 2 if abs(x) < beta 22 | huber(x) = | 23 | | beta * (abs(x) - 0.5 * beta) otherwise 24 | 25 | Smooth L1 loss is equal to huber(x) / beta. This leads to the following 26 | differences: 27 | 28 | - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss 29 | converges to a constant 0 loss. 30 | - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss 31 | converges to L2 loss. 32 | - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant 33 | slope of 1. For Huber loss, the slope of the L1 segment is beta. 34 | 35 | Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta 36 | portion replaced with a quadratic function such that at abs(x) = beta, its 37 | slope is 1. The quadratic segment smooths the L1 loss near x = 0. 38 | 39 | Args: 40 | input (Tensor): input tensor of any shape 41 | target (Tensor): target value tensor with the same shape as input 42 | beta (float): L1 to L2 change point. 43 | For beta values < 1e-5, L1 loss is computed. 44 | reduction: 'none' | 'mean' | 'sum' 45 | 'none': No reduction will be applied to the output. 46 | 'mean': The output will be averaged. 47 | 'sum': The output will be summed. 48 | 49 | Returns: 50 | The loss with the reduction option applied. 51 | 52 | Note: 53 | PyTorch's builtin "Smooth L1 loss" implementation does not actually 54 | implement Smooth L1 loss, nor does it implement Huber loss. It implements 55 | the special case of both in which they are equal (beta=1). 56 | See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss. 57 | """ 58 | if beta < 1e-5: 59 | # if beta == 0, then torch.where will result in nan gradients when 60 | # the chain rule is applied due to pytorch implementation details 61 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 62 | # zeros, rather than "no gradient"). To avoid this issue, we define 63 | # small values of beta to be exactly l1 loss. 64 | loss = torch.abs(input - target) 65 | else: 66 | n = torch.abs(input - target) 67 | cond = n < beta 68 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 69 | 70 | loss = loss * weight.unsqueeze(1) 71 | 72 | if reduction == "mean": 73 | loss = loss.mean() 74 | elif reduction == "sum": 75 | loss = loss.sum() 76 | return loss -------------------------------------------------------------------------------- /detectron2/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from detectron2.layers import ShapeSpec 5 | 6 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY 7 | from .backbone import ( 8 | BACKBONE_REGISTRY, 9 | FPN, 10 | Backbone, 11 | ResNet, 12 | ResNetBlockBase, 13 | build_backbone, 14 | build_resnet_backbone, 15 | make_stage, 16 | ) 17 | from .meta_arch import ( 18 | META_ARCH_REGISTRY, 19 | SEM_SEG_HEADS_REGISTRY, 20 | GeneralizedRCNN, 21 | PanopticFPN, 22 | ProposalNetwork, 23 | RetinaNet, 24 | SemanticSegmentor, 25 | build_model, 26 | build_sem_seg_head, 27 | ) 28 | from .postprocessing import detector_postprocess 29 | from .proposal_generator import ( 30 | PROPOSAL_GENERATOR_REGISTRY, 31 | build_proposal_generator, 32 | RPN_HEAD_REGISTRY, 33 | build_rpn_head, 34 | ) 35 | from .roi_heads import ( 36 | ROI_BOX_HEAD_REGISTRY, 37 | ROI_HEADS_REGISTRY, 38 | ROI_KEYPOINT_HEAD_REGISTRY, 39 | ROI_MASK_HEAD_REGISTRY, 40 | ROIHeads, 41 | StandardROIHeads, 42 | build_box_head, 43 | build_keypoint_head, 44 | build_mask_head, 45 | build_roi_heads, 46 | ) 47 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA 48 | 49 | _EXCLUDE = {"torch", "ShapeSpec"} 50 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 51 | 52 | assert ( 53 | torch.Tensor([1]) == torch.Tensor([2]) 54 | ).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113" 55 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip 3 | 4 | from .backbone import Backbone 5 | from .fpn import FPN 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 7 | from .deeplab import build_deeplabv2 8 | from .deeplabv2 import build_deeplabv2_v1 9 | from .resnet101 import build_resnet101 10 | 11 | # TODO can expose more resnet blocks after careful consideration 12 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str: Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self): 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec( 50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 51 | ) 52 | for name in self._out_features 53 | } 54 | 55 | # the properties below are not used any more 56 | 57 | @property 58 | def out_features(self): 59 | """deprecated""" 60 | return self._out_features 61 | 62 | @property 63 | def out_feature_strides(self): 64 | """deprecated""" 65 | return {f: self._out_feature_strides[f] for f in self._out_features} 66 | 67 | @property 68 | def out_feature_channels(self): 69 | """deprecated""" 70 | return {f: self._out_feature_channels[f] for f in self._out_features} 71 | -------------------------------------------------------------------------------- /detectron2/modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | """ 9 | Registry for backbones, which extract feature maps from images. 10 | """ 11 | 12 | 13 | def build_backbone(cfg, input_shape=None): 14 | """ 15 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 16 | 17 | Returns: 18 | an instance of :class:`Backbone` 19 | """ 20 | if input_shape is None: 21 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 22 | 23 | backbone_name = cfg.MODEL.BACKBONE.NAME 24 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 25 | assert isinstance(backbone, Backbone) 26 | return backbone 27 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 5 | 6 | from .panoptic_fpn import PanopticFPN 7 | 8 | # import all the meta_arch, so they will be registered 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork 10 | from .retinanet import RetinaNet 11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head 12 | -------------------------------------------------------------------------------- /detectron2/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from detectron2.utils.registry import Registry 3 | 4 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 5 | """ 6 | Registry for meta-architectures, i.e. the whole model. 7 | """ 8 | 9 | 10 | def build_model(cfg): 11 | """ 12 | Built the whole model, defined by `cfg.MODEL.META_ARCHITECTURE`. 13 | """ 14 | meta_arch = cfg.MODEL.META_ARCHITECTURE 15 | return META_ARCH_REGISTRY.get(meta_arch)(cfg) 16 | -------------------------------------------------------------------------------- /detectron2/modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch.nn import functional as F 3 | 4 | from detectron2.layers import paste_masks_in_image 5 | from detectron2.structures import Instances 6 | 7 | 8 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): 9 | """ 10 | Resize the output instances. 11 | The input images are often resized when entering an object detector. 12 | As a result, we often need the outputs of the detector in a different 13 | resolution from its inputs. 14 | 15 | This function will resize the raw outputs of an R-CNN detector 16 | to produce outputs according to the desired output resolution. 17 | 18 | Args: 19 | results (Instances): the raw outputs from the detector. 20 | `results.image_size` contains the input image resolution the detector sees. 21 | This object might be modified in-place. 22 | output_height, output_width: the desired output resolution. 23 | 24 | Returns: 25 | Instances: the resized output from the model, based on the output resolution 26 | """ 27 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) 28 | results = Instances((output_height, output_width), **results.get_fields()) 29 | 30 | if results.has("pred_boxes"): 31 | output_boxes = results.pred_boxes 32 | elif results.has("proposal_boxes"): 33 | output_boxes = results.proposal_boxes 34 | 35 | output_boxes.tensor[:, 0::2] *= scale_x 36 | output_boxes.tensor[:, 1::2] *= scale_y 37 | output_boxes.clip(results.image_size) 38 | 39 | results = results[output_boxes.nonempty()] 40 | 41 | if results.has("pred_masks"): 42 | results.pred_masks = paste_masks_in_image( 43 | results.pred_masks[:, 0, :, :], # N, 1, M, M 44 | results.pred_boxes, 45 | results.image_size, 46 | threshold=mask_threshold, 47 | ) 48 | 49 | if results.has("pred_keypoints"): 50 | results.pred_keypoints[:, :, 0] *= scale_x 51 | results.pred_keypoints[:, :, 1] *= scale_y 52 | 53 | return results 54 | 55 | 56 | def sem_seg_postprocess(result, img_size, output_height, output_width): 57 | """ 58 | Return semantic segmentation predictions in the original resolution. 59 | 60 | The input images are often resized when entering semantic segmentor. Moreover, in same 61 | cases, they also padded inside segmentor to be divisible by maximum network stride. 62 | As a result, we often need the predictions of the segmentor in a different 63 | resolution from its inputs. 64 | 65 | Args: 66 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 67 | where C is the number of classes, and H, W are the height and width of the prediction. 68 | img_size (tuple): image size that segmentor is taking as input. 69 | output_height, output_width: the desired output resolution. 70 | 71 | Returns: 72 | semantic segmenation prediction (Tensor): A tensor of the shape 73 | (C, output_height, output_width) that contains per-pixel soft predictions. 74 | """ 75 | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) 76 | result = F.interpolate( 77 | result, size=(output_height, output_width), mode="bilinear", align_corners=False 78 | )[0] 79 | return result 80 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator 3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head 4 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from detectron2.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | """ 8 | 9 | from . import rpn, rrpn # noqa F401 isort:skip 10 | 11 | 12 | def build_proposal_generator(cfg, input_shape): 13 | """ 14 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 15 | The name can be "PrecomputedProposals" to use no proposal generator. 16 | """ 17 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 18 | if name == "PrecomputedProposals": 19 | return None 20 | 21 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 22 | -------------------------------------------------------------------------------- /detectron2/modeling/proposal_generator/proposal_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import math 3 | import torch 4 | 5 | from detectron2.structures import Instances 6 | 7 | 8 | def add_ground_truth_to_proposals(gt_boxes, proposals): 9 | """ 10 | Call `add_ground_truth_to_proposals_single_image` for all images. 11 | 12 | Args: 13 | gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes 14 | representing the gound-truth for image i. 15 | proposals (list[Instances]): list of N elements. Element i is a Instances 16 | representing the proposals for image i. 17 | 18 | Returns: 19 | list[Instances]: list of N Instances. Each is the proposals for the image, 20 | with field "proposal_boxes" and "objectness_logits". 21 | """ 22 | assert gt_boxes is not None 23 | 24 | assert len(proposals) == len(gt_boxes) 25 | if len(proposals) == 0: 26 | return proposals 27 | 28 | return [ 29 | add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) 30 | for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) 31 | ] 32 | 33 | 34 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): 35 | """ 36 | Augment `proposals` with ground-truth boxes from `gt_boxes`. 37 | 38 | Args: 39 | Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals 40 | per image. 41 | 42 | Returns: 43 | Same as `add_ground_truth_to_proposals`, but for only one image. 44 | """ 45 | device = proposals.objectness_logits.device 46 | # Concating gt_boxes with proposals requires them to have the same fields 47 | # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. 48 | gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) 49 | 50 | gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) 51 | gt_proposal = Instances(proposals.image_size) 52 | 53 | gt_proposal.proposal_boxes = gt_boxes 54 | gt_proposal.objectness_logits = gt_logits 55 | new_proposals = Instances.cat([proposals, gt_proposal]) 56 | 57 | return new_proposals 58 | -------------------------------------------------------------------------------- /detectron2/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head 3 | from .keypoint_head import ROI_KEYPOINT_HEAD_REGISTRY, build_keypoint_head 4 | from .mask_head import ROI_MASK_HEAD_REGISTRY, build_mask_head 5 | from .roi_heads import ROI_HEADS_REGISTRY, ROIHeads, StandardROIHeads, build_roi_heads 6 | 7 | from . import cascade_rcnn # isort:skip 8 | -------------------------------------------------------------------------------- /detectron2/modeling/roi_heads/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | import fvcore.nn.weight_init as weight_init 4 | import torch 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | from detectron2.layers import Conv2d, ShapeSpec, get_norm 9 | from detectron2.utils.registry import Registry 10 | 11 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") 12 | """ 13 | Registry for box heads, which make box predictions from per-region features. 14 | """ 15 | 16 | 17 | @ROI_BOX_HEAD_REGISTRY.register() 18 | class FastRCNNConvFCHead(nn.Module): 19 | """ 20 | A head with several 3x3 conv layers (each followed by norm & relu) and 21 | several fc layers (each followed by relu). 22 | """ 23 | 24 | def __init__(self, cfg, input_shape: ShapeSpec): 25 | """ 26 | The following attributes are parsed from config: 27 | num_conv, num_fc: the number of conv/fc layers 28 | conv_dim/fc_dim: the dimension of the conv/fc layers 29 | norm: normalization for the conv layers 30 | """ 31 | super().__init__() 32 | 33 | # fmt: off 34 | num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV 35 | conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM 36 | num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC 37 | fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM 38 | norm = cfg.MODEL.ROI_BOX_HEAD.NORM 39 | # fmt: on 40 | assert num_conv + num_fc > 0 41 | 42 | self._output_size = (input_shape.channels, input_shape.height, input_shape.width) 43 | 44 | self.conv_norm_relus = [] 45 | for k in range(num_conv): 46 | conv = Conv2d( 47 | self._output_size[0], 48 | conv_dim, 49 | kernel_size=3, 50 | padding=1, 51 | bias=not norm, 52 | norm=get_norm(norm, conv_dim), 53 | activation=F.relu, 54 | ) 55 | self.add_module("conv{}".format(k + 1), conv) 56 | self.conv_norm_relus.append(conv) 57 | self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) 58 | 59 | self.fcs = [] 60 | for k in range(num_fc): 61 | fc = nn.Linear(np.prod(self._output_size), fc_dim) 62 | self.add_module("fc{}".format(k + 1), fc) 63 | self.fcs.append(fc) 64 | self._output_size = fc_dim 65 | 66 | for layer in self.conv_norm_relus: 67 | weight_init.c2_msra_fill(layer) 68 | for layer in self.fcs: 69 | weight_init.c2_xavier_fill(layer) 70 | 71 | def forward(self, x): 72 | for layer in self.conv_norm_relus: 73 | x = layer(x) 74 | if len(self.fcs): 75 | if x.dim() > 2: 76 | x = torch.flatten(x, start_dim=1) 77 | for layer in self.fcs: 78 | x = F.relu(layer(x)) 79 | return x 80 | 81 | @property 82 | def output_size(self): 83 | return self._output_size 84 | 85 | 86 | def build_box_head(cfg, input_shape): 87 | """ 88 | Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. 89 | """ 90 | name = cfg.MODEL.ROI_BOX_HEAD.NAME 91 | return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) 92 | -------------------------------------------------------------------------------- /detectron2/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | 5 | def subsample_labels(labels, num_samples, positive_fraction, bg_label): 6 | """ 7 | Return `num_samples` random samples from `labels`, with a fraction of 8 | positives no larger than `positive_fraction`. 9 | 10 | Args: 11 | labels (Tensor): (N, ) label vector with values: 12 | -1: ignore 13 | bg_label: background ("negative") class 14 | otherwise: one or more foreground ("positive") classes 15 | num_samples (int): The total number of labels with value >= 0 to return. 16 | Values that are not sampled will be filled with -1 (ignore). 17 | positive_fraction (float): The number of subsampled labels with values > 0 18 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 19 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 20 | In order words, if there are not enough positives, the sample is filled with 21 | negatives. If there are also not enough negatives, then as many elements are 22 | sampled as is possible. 23 | bg_label (int): label index of background ("negative") class. 24 | 25 | Returns: 26 | pos_idx, neg_idx (Tensor): 1D indices. The total number of indices is `num_samples` 27 | if possible. The fraction of positive indices is `positive_fraction` if possible. 28 | """ 29 | positive = torch.nonzero((labels != -1) & (labels != bg_label)).squeeze(1) 30 | negative = torch.nonzero(labels == bg_label).squeeze(1) 31 | 32 | num_pos = int(num_samples * positive_fraction) 33 | # protect against not enough positive examples 34 | num_pos = min(positive.numel(), num_pos) 35 | num_neg = num_samples - num_pos 36 | # protect against not enough negative examples 37 | num_neg = min(negative.numel(), num_neg) 38 | 39 | # randomly select positive and negative examples 40 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 41 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 42 | 43 | pos_idx = positive[perm1] 44 | neg_idx = negative[perm2] 45 | return pos_idx, neg_idx 46 | -------------------------------------------------------------------------------- /detectron2/modeling/weaklygrounding/loss_kac.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from detectron2.layers.numerical_stability_softmax import numerical_stability_softmax 5 | from detectron2.config import global_cfg as cfg 6 | from fvcore.nn import sigmoid_focal_loss_jit, smooth_l1_loss 7 | from detectron2.structures import Boxes 8 | from detectron2.utils.events import get_event_storage 9 | from detectron2.layers.weighted_smooth_l1_loss import smooth_l1_loss as weighted_smooth_l1_loss 10 | 11 | 12 | 13 | class WeaklyVGLossCompute(): 14 | def __init__(self): 15 | self.cross_entropy = torch.nn.CrossEntropyLoss(reduction='mean') 16 | self.device = torch.device('cuda') 17 | self.reg_lambda = cfg.MODEL.VG.REG_LOSS_FACTOR 18 | 19 | def __call__(self, batch_phrase_mask, batch_decode_logits, batch_phrase_dec_ids, batch_pred_delta, batch_gt_delta, batch_pred_similarity): 20 | 21 | noun_reconst_loss = torch.zeros(1).to(self.device) 22 | visual_consistency_loss = torch.zeros(1).to(self.device) 23 | 24 | 25 | for (phr_mask, decode_logits, phrase_dec_ids, pred_delta, det_sim, gt_delta) in zip(batch_phrase_mask, batch_decode_logits, batch_phrase_dec_ids, batch_pred_delta, batch_pred_similarity, batch_gt_delta): 26 | 27 | 28 | ## here we ignore the first world reconstruction, 29 | phrase_dec_ids = torch.as_tensor(phrase_dec_ids).long().to(self.device) 30 | vx, vy = (phr_mask > 0).nonzero().transpose(0, 1) 31 | noun_reconst_loss += self.cross_entropy(decode_logits[vx, vy], phrase_dec_ids[vx, vy]) 32 | 33 | """ 34 | pred_delta: np*nb*4 35 | gt_delta: nb*4 36 | det_sim: np*nb 37 | """ 38 | 39 | np, nb = det_sim.shape 40 | pred_delta = pred_delta.reshape(-1, 4) 41 | gt_delta = gt_delta.unsqueeze(0).repeat(np, 1, 1).reshape(-1, 4) 42 | gt_delta = gt_delta - 0.5 43 | n = torch.abs(pred_delta - gt_delta) 44 | cond = n < 1 45 | loss = torch.where(cond, 0.5 * n ** 2, n - 0.5).mean(1) 46 | vc_loss = det_sim.reshape(-1) * loss 47 | visual_consistency_loss += self.reg_lambda * vc_loss.sum() 48 | 49 | return noun_reconst_loss, visual_consistency_loss 50 | -------------------------------------------------------------------------------- /detectron2/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_lr_scheduler, build_optimizer 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /detectron2/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .boxes import Boxes, BoxMode, pairwise_iou 3 | from .image_list import ImageList 4 | from .instances import Instances 5 | from .keypoints import Keypoints, heatmaps_to_keypoints 6 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box 7 | from .rotated_boxes import RotatedBoxes 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /detectron2/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /detectron2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /detectron2/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import os 3 | import sys 4 | from collections import defaultdict 5 | import PIL 6 | import torch 7 | from tabulate import tabulate 8 | 9 | __all__ = ["collect_env_info"] 10 | 11 | 12 | def collect_torch_env(): 13 | try: 14 | import torch.__config__ 15 | 16 | return torch.__config__.show() 17 | except ImportError: 18 | # compatible with older versions of pytorch 19 | from torch.utils.collect_env import get_pretty_env_info 20 | 21 | return get_pretty_env_info() 22 | 23 | 24 | def get_env_module(): 25 | var_name = "DETECTRON2_ENV_MODULE" 26 | return var_name, os.environ.get(var_name, "") 27 | 28 | 29 | def collect_env_info(): 30 | data = [] 31 | data.append(("Python", sys.version.replace("\n", ""))) 32 | try: 33 | from detectron2 import _C 34 | except ImportError: 35 | pass 36 | else: 37 | data.append(("Detectron2 Compiler", _C.get_compiler_version())) 38 | 39 | data.append(get_env_module()) 40 | data.append(("PyTorch", torch.__version__)) 41 | data.append(("PyTorch Debug Build", torch.version.debug)) 42 | 43 | has_cuda = torch.cuda.is_available() 44 | data.append(("CUDA available", has_cuda)) 45 | if has_cuda: 46 | devices = defaultdict(list) 47 | for k in range(torch.cuda.device_count()): 48 | devices[torch.cuda.get_device_name(k)].append(str(k)) 49 | for name, devids in devices.items(): 50 | data.append(("GPU " + ",".join(devids), name)) 51 | data.append(("Pillow", PIL.__version__)) 52 | 53 | try: 54 | import cv2 55 | 56 | data.append(("cv2", cv2.__version__)) 57 | except ImportError: 58 | pass 59 | env_str = tabulate(data) + "\n" 60 | env_str += collect_torch_env() 61 | return env_str 62 | 63 | 64 | if __name__ == "__main__": 65 | print(collect_env_info()) 66 | -------------------------------------------------------------------------------- /detectron2/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | class Registry(object): 3 | """ 4 | The registry that provides name -> object mapping, to support third-party users' custom modules. 5 | 6 | To create a registry (inside detectron2): 7 | 8 | .. code-block:: python 9 | 10 | BACKBONE_REGISTRY = Registry('BACKBONE') 11 | 12 | To register an object: 13 | 14 | .. code-block:: python 15 | 16 | @BACKBONE_REGISTRY.register() 17 | class MyBackbone(): 18 | ... 19 | 20 | Or: 21 | 22 | BACKBONE_REGISTRY.register(obj=MyBackbone) 23 | """ 24 | 25 | def __init__(self, name): 26 | """ 27 | Args: 28 | name (str): the name of this registry 29 | """ 30 | self._name = name 31 | 32 | self._obj_map = {} 33 | 34 | def _do_register(self, name, obj): 35 | assert ( 36 | name not in self._obj_map 37 | ), "An object named '{}' was already registered in '{}' registry!".format(name, self._name) 38 | self._obj_map[name] = obj 39 | 40 | def register(self, obj=None): 41 | """ 42 | Register the given object under the the name `obj.__name__`. 43 | Can be used as either a decorator or not. See docstring of this class for usage. 44 | """ 45 | if obj is None: 46 | # used as a decorator 47 | def deco(func_or_class): 48 | name = func_or_class.__name__ 49 | self._do_register(name, func_or_class) 50 | return func_or_class 51 | 52 | return deco 53 | 54 | # used as a function call 55 | name = obj.__name__ 56 | self._do_register(name, obj) 57 | 58 | def get(self, name): 59 | ret = self._obj_map.get(name) 60 | if ret is None: 61 | raise KeyError("No object named '{}' found in '{}' registry!".format(name, self._name)) 62 | return ret 63 | -------------------------------------------------------------------------------- /detectron2/utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seemlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /dev/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Some scripts for developers to use, include: 3 | 4 | - `linter.sh`: lint the codebase before commit 5 | - `run_{inference,instant}_tests.sh`: run inference/training for a few iterations. 6 | - `parse_results.sh`: parse results from log file. 7 | -------------------------------------------------------------------------------- /dev/linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ev 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Run this script at project root by "./dev/linter.sh" before you commit 5 | 6 | echo "Running isort ..." 7 | isort -y --multi-line 3 --trailing-comma -sp . --skip datasets --skip docs --skip-glob '*/__init__.py' --atomic 8 | 9 | echo "Running black ..." 10 | black -l 100 . 11 | 12 | echo "Running flake8 ..." 13 | if [ -x "$(command -v flake8-3)" ]; then 14 | flake8-3 . 15 | else 16 | python3 -m flake8 . 17 | fi 18 | 19 | # echo "Running mypy ..." 20 | # Pytorch does not have enough type annotations 21 | # mypy detectron2/solver detectron2/structures detectron2/config 22 | 23 | echo "Running clang-format ..." 24 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i 25 | 26 | command -v arc > /dev/null && arc lint 27 | -------------------------------------------------------------------------------- /dev/parse_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # A shell script that parses metrics from the log file. 5 | # Make it easier for developers to track performance of models. 6 | 7 | LOG="$1" 8 | 9 | if [[ -z "$LOG" ]]; then 10 | echo "Usage: $0 /path/to/log/file" 11 | exit 1 12 | fi 13 | 14 | # [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it) 15 | # [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices) 16 | 17 | # training time 18 | trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*') 19 | echo "Training speed: $trainspeed s/it" 20 | 21 | # inference time: there could be multiple inference during training 22 | inferencespeed=$(grep -o 'Total inference.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1) 23 | echo "Inference speed: $inferencespeed s/it" 24 | 25 | # [12/15 11:47:18] trainer INFO: eta: 0:00:00 iter: 90000 loss: 0.5407 (0.7256) loss_classifier: 0.1744 (0.2446) loss_box_reg: 0.0838 (0.1160) loss_mask: 0.2159 (0.2722) loss_objectness: 0.0244 (0.0429) loss_rpn_box_reg: 0.0279 (0.0500) time: 0.4487 (0.4899) data: 0.0076 (0.0975) lr: 0.000200 max mem: 4161 26 | memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*') 27 | echo "Training memory: $memory MB" 28 | 29 | echo "Easy to copypaste:" 30 | echo "$trainspeed","$inferencespeed","$memory" 31 | 32 | echo "------------------------------" 33 | 34 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox 35 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 36 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011 37 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm 38 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 39 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011 40 | 41 | echo "COCO Results:" 42 | num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l) 43 | # each task has 3 lines 44 | grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3)) 45 | -------------------------------------------------------------------------------- /dev/run_inference_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="inference_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | 10 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 11 | CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) 12 | fi 13 | 14 | echo "========================================================================" 15 | echo "Configs to run:" 16 | echo "${CFG_LIST[@]}" 17 | echo "========================================================================" 18 | 19 | 20 | for cfg in "${CFG_LIST[@]}"; do 21 | echo "========================================================================" 22 | echo "Running $cfg ..." 23 | echo "========================================================================" 24 | $BIN \ 25 | --eval-only \ 26 | --num-gpus $NUM_GPUS \ 27 | --config-file "$cfg" \ 28 | OUTPUT_DIR $OUTPUT 29 | rm -rf $OUTPUT 30 | done 31 | 32 | 33 | echo "========================================================================" 34 | echo "Running demo.py ..." 35 | echo "========================================================================" 36 | DEMO_BIN="python demo/demo.py" 37 | COCO_DIR=datasets/coco/val2014 38 | mkdir -pv $OUTPUT 39 | 40 | set -v 41 | 42 | $DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \ 43 | --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT 44 | rm -rf $OUTPUT 45 | -------------------------------------------------------------------------------- /dev/run_instant_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="instant_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 10 | CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) 11 | fi 12 | 13 | echo "========================================================================" 14 | echo "Configs to run:" 15 | echo "${CFG_LIST[@]}" 16 | echo "========================================================================" 17 | 18 | for cfg in "${CFG_LIST[@]}"; do 19 | echo "========================================================================" 20 | echo "Running $cfg ..." 21 | echo "========================================================================" 22 | $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ 23 | SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \ 24 | OUTPUT_DIR "$OUTPUT" 25 | rm -rf "$OUTPUT" 26 | done 27 | 28 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Build the docs: 3 | 4 | 1. Install dependencies in `requirements.txt` 5 | 2. `make html` 6 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. detectron2 documentation master file, created by 2 | sphinx-quickstart on Sat Sep 21 13:46:45 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to detectron2's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | tutorials/index 13 | notes/index 14 | modules/index 15 | -------------------------------------------------------------------------------- /docs/modules/checkpoint.rst: -------------------------------------------------------------------------------- 1 | detectron2.checkpoint package 2 | ============================= 3 | 4 | .. automodule:: detectron2.checkpoint 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/config.rst: -------------------------------------------------------------------------------- 1 | detectron2.config package 2 | ========================= 3 | 4 | .. automodule:: detectron2.config 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/data.datasets.rst: -------------------------------------------------------------------------------- 1 | detectron2.data.datasets package 2 | ================================ 3 | 4 | .. automodule:: detectron2.data.datasets 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/data.rst: -------------------------------------------------------------------------------- 1 | detectron2.data package 2 | ======================= 3 | 4 | .. automodule:: detectron2.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | detectron2.data.detection\_utils module 10 | --------------------------------------- 11 | 12 | .. automodule:: detectron2.data.detection_utils 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | Subpackages 18 | ----------- 19 | 20 | .. toctree:: 21 | 22 | data.datasets 23 | data.samplers 24 | data.transforms 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/modules/data.samplers.rst: -------------------------------------------------------------------------------- 1 | detectron2.data.samplers package 2 | ================================ 3 | 4 | .. automodule:: detectron2.data.samplers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/data.transforms.rst: -------------------------------------------------------------------------------- 1 | detectron2.data.transforms package 2 | ================================== 3 | 4 | .. automodule:: detectron2.data.transforms 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/engine.rst: -------------------------------------------------------------------------------- 1 | detectron2.engine package 2 | ========================= 3 | 4 | 5 | .. automodule:: detectron2.engine 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | 11 | detectron2.engine.defaults module 12 | --------------------------------- 13 | 14 | .. automodule:: detectron2.engine.defaults 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | detectron2.engine.hooks module 20 | --------------------------------- 21 | 22 | .. automodule:: detectron2.engine.hooks 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | -------------------------------------------------------------------------------- /docs/modules/evaluation.rst: -------------------------------------------------------------------------------- 1 | detectron2.evaluation package 2 | ============================= 3 | 4 | .. automodule:: detectron2.evaluation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/index.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================== 3 | 4 | .. toctree:: 5 | 6 | checkpoint 7 | config 8 | data 9 | engine 10 | evaluation 11 | layers 12 | modeling 13 | solver 14 | structures 15 | utils 16 | -------------------------------------------------------------------------------- /docs/modules/layers.rst: -------------------------------------------------------------------------------- 1 | detectron2.layers package 2 | ========================= 3 | 4 | .. automodule:: detectron2.layers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/modeling.rst: -------------------------------------------------------------------------------- 1 | detectron2.modeling package 2 | =========================== 3 | 4 | .. automodule:: detectron2.modeling 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/solver.rst: -------------------------------------------------------------------------------- 1 | detectron2.solver package 2 | ========================= 3 | 4 | .. automodule:: detectron2.solver 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/structures.rst: -------------------------------------------------------------------------------- 1 | detectron2.structures package 2 | ============================= 3 | 4 | .. automodule:: detectron2.structures 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/modules/utils.rst: -------------------------------------------------------------------------------- 1 | detectron2.utils package 2 | ======================== 3 | 4 | detectron2.utils.colormap module 5 | -------------------------------- 6 | 7 | .. automodule:: detectron2.utils.colormap 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | detectron2.utils.comm module 13 | ---------------------------- 14 | 15 | .. automodule:: detectron2.utils.comm 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 21 | detectron2.utils.events module 22 | ------------------------------ 23 | 24 | .. automodule:: detectron2.utils.events 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | detectron2.utils.logger module 31 | ------------------------------ 32 | 33 | .. automodule:: detectron2.utils.logger 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | detectron2.utils.registry module 40 | -------------------------------- 41 | 42 | .. automodule:: detectron2.utils.registry 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | detectron2.utils.video\_visualizer module 49 | ----------------------------------------- 50 | 51 | .. automodule:: detectron2.utils.video_visualizer 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | detectron2.utils.visualizer module 57 | ---------------------------------- 58 | 59 | .. automodule:: detectron2.utils.visualizer 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | -------------------------------------------------------------------------------- /docs/notes/changelog.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | ### Major Changes in Code 5 | 6 | * 2019-10-10: initial release. 7 | 8 | 9 | ### Config Version Change Log 10 | 11 | * v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`. 12 | * v2: A batch of rename of many configurations before release. 13 | -------------------------------------------------------------------------------- /docs/notes/index.rst: -------------------------------------------------------------------------------- 1 | Notes 2 | ====================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | benchmarks 8 | compatibility 9 | changelog 10 | -------------------------------------------------------------------------------- /docs/tutorials/configs.md: -------------------------------------------------------------------------------- 1 | # Using Configs 2 | 3 | Detectron2's config system uses yaml and [yacs](https://github.com/rbgirshick/yacs). 4 | In addition to the basic operations that access and update a config, we provide 5 | the following extra functionalities: 6 | 7 | 1. The config can have `_BASE_: base.yaml` field, which will load a base config first. 8 | Values in the base config will be overwritten in sub-configs, if there are any conflicts. 9 | We provided several base configs for standard model architectures. 10 | 2. We provide config versioning, for backward compatibility. 11 | If your config file is versioned with a config line like `VERSION: 2`, 12 | detectron2 will still recognize it even if we make rename to some keys in the future. 13 | 14 | ### Best Practice with Configs 15 | 16 | 1. Treat the configs you write as "code": avoid copying them or duplicating them; use "_BASE_" 17 | instead to share common parts between configs. 18 | 19 | 2. Keep the configs you write simple: don't include keys that do not affect the experimental setting. 20 | 21 | 3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`, 22 | for backward compatibility. 23 | 24 | 4. Save a full config together with a trained model, and use it to run inference. 25 | This is more robust to changes that may happen to the config definition 26 | (e.g., if a default value changed). 27 | -------------------------------------------------------------------------------- /docs/tutorials/extend.md: -------------------------------------------------------------------------------- 1 | # Extend Detectron2's Defaults 2 | 3 | __Research is about doing things in new ways__. 4 | This brings a tension in how to create abstractions in code, 5 | which is a challenge for any research engineering project of a significant size: 6 | 7 | 1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing 8 | everything in new ways. It should be reasonably easy to break existing 9 | abstractions and replace them with new ones. 10 | 11 | 2. On the other hand, such a project also needs reasonably high-level 12 | abstractions, so that users can easily do things in standard ways, 13 | without worrying too much about the details that only certain researchers care about. 14 | 15 | In detectron2, there are two types of interfaces that address this tension together: 16 | 17 | 1. Functions and classes that take only a "config" argument (optionally with a minimal 18 | set of extra arguments in cases of mature interfaces). 19 | 20 | Such functions and classes implement 21 | the "standard default" behavior: it will read what it needs from the 22 | config and do the "standard" thing. 23 | Users only need to load a standard config and pass it around, without having to worry about 24 | which arguments are used and what they all mean. 25 | 26 | 2. Functions and classes that have well-defined explicit arguments. 27 | 28 | Each of these is a small building block of the entire system. 29 | They require users' effort to stitch together, but can be stitched together in more flexible ways. 30 | When you need to implement something different from the "standard defaults" 31 | included in detectron2, these well-defined components can be reused. 32 | 33 | 34 | If you only need the standard behavior, the [Beginner's Tutorial](getting_started) 35 | should suffice. If you need to extend detectron2 to your own needs, 36 | see the following tutorials for more details: 37 | 38 | * Detectron2 includes a few standard datasets, but you can use custom ones. See 39 | [Use Custom Datasets](datasets). 40 | * Detectron2 contains the standard logic that creates a data loader from a 41 | dataset, but you can write your own as well. See [Use Custom Data Loaders](data_loading). 42 | * Detectron2 implements many standard detection models, and provide ways for you 43 | to overwrite its behaviors. See [Writing Models](models). 44 | * Detectron2 provides a default training loop that is good for common training tasks. 45 | You can customize it with hooks, or write your own loop instead. See [training](training). 46 | -------------------------------------------------------------------------------- /docs/tutorials/getting_started.md: -------------------------------------------------------------------------------- 1 | 2 | ## Getting Started with Detectron2 3 | 4 | This document provides a brief intro of the usage of builtin command-line tools in detectron2. 5 | 6 | For a tutorial that involves actual coding with the API, 7 | see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) 8 | which covers how to run inference with an 9 | existing model, and how to train a builtin model on a custom dataset. 10 | 11 | For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html). 12 | 13 | 14 | ### Inference with Pre-trained Models 15 | 16 | 1. Pick a model and its config file from 17 | [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md), 18 | for example, `mask_rcnn_R_50_FPN_3x.yaml`. 19 | 2. Run the demo with 20 | ``` 21 | python demo/demo.py --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ 22 | --input input1.jpg input2.jpg \ 23 | --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl 24 | ``` 25 | It will run the inference and show visualizations in an OpenCV window. 26 | 27 | * To run __on your webcam__, replace `--input files` with `--webcam`. 28 | * To run __on a video__, replace `--input files` with `--video-input video.mp4`. 29 | * To save outputs to a directory (for images) or a file (for webcam or video), use `--output`. 30 | 31 | 32 | ### Train a Standard Model 33 | 34 | We provide a script in "tools/train_net.py", that is made to train 35 | all the configs provided in detectron2. 36 | You may want to use it as a reference to write your own training script for a new research. 37 | 38 | To train a model with "train_net.py", first 39 | setup the corresponding datasets following 40 | [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md), 41 | then run: 42 | ``` 43 | python tools/train_net.py --num-gpus 8 \ 44 | --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml 45 | ``` 46 | 47 | The configs are made for 8-GPU training. To train on 1 GPU, use: 48 | ``` 49 | python tools/train_net.py \ 50 | --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ 51 | SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 52 | ``` 53 | 54 | For most models, CPU training is not supported. 55 | 56 | (Note that we applied the [linear learning rate scaling rule](https://arxiv.org/abs/1706.02677) 57 | when changing the batch size.) 58 | 59 | To evaluate a model's performance, use `train_net.py --eval-only`. 60 | For more options, see `python tools/train_net.py -h`. 61 | 62 | ### Use Detectron2 in Your Code 63 | 64 | See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) 65 | to learn how to use detectron2 APIs to: 66 | 1. run inference with an existing model 67 | 2. train a builtin model on a custom dataset 68 | 69 | See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects) 70 | for more ways to build your project on detectron2. 71 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ====================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | install 8 | getting_started 9 | extend 10 | datasets 11 | data_loading 12 | models 13 | training 14 | configs 15 | -------------------------------------------------------------------------------- /docs/tutorials/install.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) also has step-by-step instructions that install detectron2. 4 | 5 | ### Requirements 6 | - Python >= 3.6 7 | - PyTorch 1.3 8 | - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. 9 | You can install them together at [pytorch.org](http://pytorch.org) to make sure of this. 10 | - OpenCV, needed by demo and visualization 11 | - [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'` 12 | - pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'` 13 | - GCC >= 4.9 14 | - apex 15 | git clone https://www.github.com/nvidia/apex 16 | cd apex 17 | python setup.py install 18 | 19 | 20 | ### Build detectron2 21 | 22 | After having the above dependencies, run: 23 | ``` 24 | git clone git@github.com:facebookresearch/detectron2.git 25 | cd detectron2 26 | export TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" ## compile for every platform 27 | python setup.py build develop 28 | 29 | # or if you are on macOS 30 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 31 | 32 | # or, as an alternative to `setup.py`, do 33 | # pip install . 34 | ``` 35 | Note: you may need to rebuild detectron2 after reinstalling a different build of PyTorch. 36 | 37 | ### Common Installation Issues 38 | 39 | + Undefined torch/aten symbols, or segmentation fault immediately when running the library. 40 | This may mean one of the two: 41 | 42 | * detectron2 or torchvision is not compiled with the version of PyTorch you're running. 43 | 44 | If you use a pre-built torchvision, uninstall torchvision & pytorch, and reinstall them 45 | following [pytorch.org](http://pytorch.org). 46 | If you manually build detectron2 or torchvision, remove the files you built (`build/`, `**/*.so`) 47 | and rebuild them. 48 | 49 | * detectron2 or torchvision is not compiled using gcc >= 4.9. 50 | 51 | You'll see a warning message during compilation in this case. Please remove the files you build, 52 | and rebuild them. 53 | Technically, you need the identical compiler that's used to build pytorch to guarantee 54 | compatibility. But in practice, gcc >= 4.9 should work OK. 55 | 56 | + Undefined cuda symbols. The version of NVCC you use to build detectron2 or torchvision does 57 | not match the version of cuda you are running with. 58 | This happens sometimes when using anaconda. 59 | 60 | -------------------------------------------------------------------------------- /docs/tutorials/models.md: -------------------------------------------------------------------------------- 1 | # Using and Writing Models 2 | 3 | Models (and their sub-models) in detectron2 are built by 4 | functions such as `build_model`, `build_backbone`, `build_roi_heads`: 5 | ```python 6 | from detectron2.modeling import build_model 7 | model = build_model(cfg) # returns a torch.nn.Module 8 | ``` 9 | 10 | In some cases, e.g. if you are trying to do something completely new, you may wish to implement 11 | a model entirely from scratch within detectron2. However, in many situations you may 12 | be interested in modifying or extending some components of an existing model. 13 | Therefore, we also provide a registration mechanism that lets you override the 14 | behavior of certain internal components of standard models. 15 | 16 | For example, to add a new backbone, import this code: 17 | ```python 18 | from detectron2.modeling import BACKBONE_REGISTRY, Backbone 19 | @BACKBONE_REGISTRY.register() 20 | class NewBackBone(Backbone): 21 | def __init__(self, cfg, input_shape): 22 | # create your own backbone 23 | ``` 24 | which will allow you to use `cfg.MODEL.BACKBONE.NAME = 'NewBackBone'` in your config file. 25 | 26 | As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture, 27 | you can implement a new 28 | [ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`. 29 | See [densepose in detectron2](https://github.com/facebookresearch/detectron2/tree/master/projects/DensePose) 30 | for an example. 31 | 32 | Other registries can be found in [API documentation](../modules/modeling.html). 33 | -------------------------------------------------------------------------------- /docs/tutorials/training.md: -------------------------------------------------------------------------------- 1 | # Training 2 | 3 | From the previous tutorials, you may now have a custom model and data loader. 4 | 5 | You are free to create your own optimizer, and write the training logic: it's 6 | usually easy with PyTorch, and allow researchers to see the entire training 7 | logic more clearly. 8 | 9 | We also provide a standarized "trainer" abstraction with a 10 | [minimal hook system](../modules/engine.html#detectron2.engine.HookBase) 11 | that helps simplify the standard types of training. 12 | 13 | You can use 14 | [SimpleTrainer().train()](../modules/engine.html#detectron2.engine.SimpleTrainer) 15 | which does single-cost single-optimizer single-data-source training. 16 | Or use [DefaultTrainer().train()](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) 17 | which includes more standard behavior that one might want to opt in. 18 | -------------------------------------------------------------------------------- /killpy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ps -ef|grep python|grep -v grep|cut -c 9-15|xargs kill -9 -------------------------------------------------------------------------------- /outputs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/youngfly11/ReIR-WeaklyGrounding.pytorch/2a962c335541c981149a042794ee508e0e7226f4/outputs/.DS_Store -------------------------------------------------------------------------------- /projects/DensePose/README.md: -------------------------------------------------------------------------------- 1 | 2 | # DensePose in Detectron2 3 | **Dense Human Pose Estimation In The Wild** 4 | 5 | _Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_ 6 | 7 | [[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)] 8 | 9 | Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body. 10 | 11 |
12 | 13 |
14 | 15 | In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize 16 | DensePose annotation and results. 17 | 18 | ## Training 19 | 20 | To train a model one can call 21 | ```bash 22 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file 23 | ``` 24 | 25 | For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone on a single GPU, 26 | one should execute: 27 | ```bash 28 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file /path/to/detectron2/projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml 29 | ``` 30 | 31 | ## Testing 32 | 33 | Model testing can be done in the same way as training, except for an additional flag `--eval-only` and 34 | model location specification through `MODEL.WEIGHT model.pth` in the command line 35 | ```bash 36 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file /path/to/detectron2/projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml --eval-only MODEL.WEIGHT model.pth 37 | ``` 38 | 39 | ## Tools 40 | 41 | We provide tools which allow one to: 42 | - easily view DensePose annotated data in a dataset; 43 | - perform DensePose inference on a set of images; 44 | - visualize DensePose model results; 45 | 46 | `query_db` is a tool to print or visualize DensePose data in a dataset. 47 | Details on this tool can be found in [`TOOL_QUERY_DB.md`](doc/TOOL_QUERY_DB.md) 48 | 49 | `apply_net` is a tool to print or visualize DensePose results. 50 | Details on this tool can be found in [`TOOL_APPLY_NET.md`](doc/TOOL_APPLY_NET.md) 51 | 52 | ## Citing DensePose 53 | 54 | If you use DensePose, please use the following BibTeX entry. 55 | 56 | ``` 57 | @InProceedings{Guler2018DensePose, 58 | title={DensePose: Dense Human Pose Estimation In The Wild}, 59 | author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos}, 60 | journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 61 | year={2018} 62 | } 63 | ``` 64 | 65 | -------------------------------------------------------------------------------- /projects/DensePose/configs/Base-DensePose-RCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | 22 | DENSEPOSE_ON: True 23 | ROI_HEADS: 24 | NAME: "DensePoseROIHeads" 25 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 26 | NUM_CLASSES: 1 27 | ROI_BOX_HEAD: 28 | NAME: "FastRCNNConvFCHead" 29 | NUM_FC: 2 30 | POOLER_RESOLUTION: 7 31 | POOLER_SAMPLING_RATIO: 2 32 | POOLER_TYPE: "ROIAlign" 33 | ROI_DENSEPOSE_HEAD: 34 | NAME: "DensePoseV1ConvXHead" 35 | POOLER_TYPE: "ROIAlign" 36 | DATASETS: 37 | TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival") 38 | TEST: ("densepose_coco_2014_minival",) 39 | SOLVER: 40 | IMS_PER_BATCH: 16 41 | BASE_LR: 0.02 42 | STEPS: (60000, 80000) 43 | MAX_ITER: 90000 44 | -------------------------------------------------------------------------------- /projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DensePose-RCNN.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 4 | MODEL: 5 | WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50" 6 | SOLVER: 7 | MAX_ITER: 130000 8 | STEPS: (100000, 120000) 9 | BASE_LR: 0.002 10 | IMS_PER_BATCH: 24 11 | WARMUP_FACTOR: 0.1 12 | 13 | -------------------------------------------------------------------------------- /projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../densepose_R_50_FPN_s1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://e2e_densepose_R_50_FPN_s1x/124238535/model_final_5f3d7f9875229310fdfe6649459c0157.pkl" 4 | DATASETS: 5 | TRAIN: () 6 | TEST: ("densepose_coco_2014_minival_100",) 7 | TEST: 8 | EXPECTED_RESULTS: [["bbox", "AP", 56.05, 0.025], ["densepose", "AP", 46.54, 0.02]] 9 | -------------------------------------------------------------------------------- /projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-DensePose-RCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DENSEPOSE_ON: True 5 | ROI_DENSEPOSE_HEAD: 6 | NAME: "DensePoseV1ConvXHead" 7 | DATASETS: 8 | TRAIN: ("densepose_coco_2014_minival_100",) 9 | TEST: ("densepose_coco_2014_minival_100",) 10 | SOLVER: 11 | MAX_ITER: 40 12 | STEPS: (30,) 13 | BASE_LR: 0.002 14 | IMS_PER_BATCH: 24 15 | -------------------------------------------------------------------------------- /projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-DensePose-RCNN.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 4 | MODEL: 5 | WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50" 6 | DENSEPOSE_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_BOX_HEAD: 10 | POOLER_RESOLUTION: 7 11 | POOLER_SAMPLING_RATIO: 2 12 | ROI_DENSEPOSE_HEAD: 13 | NAME: "DensePoseV1ConvXHead" 14 | DATASETS: 15 | TRAIN: ("densepose_coco_2014_minival",) 16 | TEST: ("densepose_coco_2014_minival",) 17 | SOLVER: 18 | MAX_ITER: 6000 19 | STEPS: (5500, 5800) 20 | BASE_LR: 0.002 21 | IMS_PER_BATCH: 24 22 | WARMUP_FACTOR: 0.1 23 | TEST: 24 | EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose", "AP", 42.47, 1.5]] 25 | 26 | -------------------------------------------------------------------------------- /projects/DensePose/densepose/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from . import dataset # just to register data 3 | from .config import add_densepose_config 4 | from .dataset_mapper import DatasetMapper 5 | from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY 6 | from .evaluator import DensePoseCOCOEvaluator 7 | from .roi_head import DensePoseROIHeads 8 | from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData 9 | -------------------------------------------------------------------------------- /projects/DensePose/densepose/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from detectron2.config import CfgNode as CN 5 | 6 | 7 | def add_densepose_config(cfg): 8 | """ 9 | Add config for densepose head. 10 | """ 11 | _C = cfg 12 | 13 | _C.MODEL.DENSEPOSE_ON = True 14 | 15 | _C.MODEL.ROI_DENSEPOSE_HEAD = CN() 16 | _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = "" 17 | _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8 18 | # Number of parts used for point labels 19 | _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24 20 | _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4 21 | _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512 22 | _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3 23 | _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2 24 | _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 56 25 | _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2" 26 | _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 14 27 | _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2 28 | # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) 29 | _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7 30 | # Loss weights for annotation masks.(14 Parts) 31 | _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 2.0 32 | # Loss weights for surface parts. (24 Parts) 33 | _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 0.3 34 | # Loss weights for UV regression. 35 | _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.1 36 | -------------------------------------------------------------------------------- /projects/DensePose/densepose/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import os 3 | 4 | from detectron2.data.datasets import register_coco_instances 5 | 6 | 7 | def get_densepose_metadata(): 8 | meta = { 9 | "thing_classes": ["person"], 10 | "densepose_transform_src": "detectron2://densepose/UV_symmetry_transforms.mat", 11 | "densepose_smpl_subdiv": "detectron2://densepose/SMPL_subdiv.mat", 12 | "densepose_smpl_subdiv_transform": "detectron2://densepose/SMPL_SUBDIV_TRANSFORM.mat", 13 | } 14 | return meta 15 | 16 | 17 | SPLITS = { 18 | "densepose_coco_2014_train": ("coco/train2014", "coco/annotations/densepose_train2014.json"), 19 | "densepose_coco_2014_minival": ("coco/val2014", "coco/annotations/densepose_minival2014.json"), 20 | "densepose_coco_2014_minival_100": ( 21 | "coco/val2014", 22 | "coco/annotations/densepose_minival2014_100.json", 23 | ), 24 | "densepose_coco_2014_valminusminival": ( 25 | "coco/val2014", 26 | "coco/annotations/densepose_valminusminival2014.json", 27 | ), 28 | } 29 | 30 | for key, (image_root, json_file) in SPLITS.items(): 31 | # Assume pre-defined datasets live in `./datasets`. 32 | register_coco_instances( 33 | key, 34 | get_densepose_metadata(), 35 | os.path.join("datasets", json_file), 36 | os.path.join("datasets", image_root), 37 | ) 38 | -------------------------------------------------------------------------------- /projects/DensePose/densepose/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | 4 | 5 | def verbosity_to_level(verbosity): 6 | if verbosity is not None: 7 | if verbosity == 0: 8 | return logging.WARNING 9 | elif verbosity == 1: 10 | return logging.INFO 11 | elif verbosity >= 2: 12 | return logging.DEBUG 13 | return logging.WARNING 14 | -------------------------------------------------------------------------------- /projects/DensePose/densepose/vis/bounding_box.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .base import RectangleVisualizer, TextVisualizer 3 | 4 | 5 | class BoundingBoxVisualizer(object): 6 | def __init__(self): 7 | self.rectangle_visualizer = RectangleVisualizer() 8 | 9 | def visualize(self, image_bgr, boxes_xywh): 10 | for bbox_xywh in boxes_xywh: 11 | image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh) 12 | return image_bgr 13 | 14 | 15 | class ScoredBoundingBoxVisualizer(object): 16 | def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None): 17 | if bbox_visualizer_params is None: 18 | bbox_visualizer_params = {} 19 | if score_visualizer_params is None: 20 | score_visualizer_params = {} 21 | self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params) 22 | self.visualizer_score = TextVisualizer(**score_visualizer_params) 23 | 24 | def visualize(self, image_bgr, scored_bboxes): 25 | boxes_xywh, box_scores = scored_bboxes 26 | assert len(boxes_xywh) == len(box_scores), ( 27 | "Number of bounding boxes {} should be equal to the number of " 28 | "scores".format(len(boxes_xywh), len(box_scores)) 29 | ) 30 | for i, box_xywh in enumerate(boxes_xywh): 31 | score_i = box_scores[i] 32 | image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh) 33 | score_txt = "{0:6.4f}".format(score_i) 34 | topleft_xy = box_xywh[0], box_xywh[1] 35 | image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy) 36 | return image_bgr 37 | -------------------------------------------------------------------------------- /projects/DensePose/train_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | DensePose Training Script. 4 | 5 | This script is similar to the training script in detectron2/tools. 6 | 7 | It is an example of how a user might use detectron2 for a new project. 8 | """ 9 | 10 | import os 11 | 12 | import detectron2.utils.comm as comm 13 | from detectron2.checkpoint import DetectionCheckpointer 14 | from detectron2.config import get_cfg 15 | from detectron2.data import build_detection_test_loader, build_detection_train_loader 16 | from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch 17 | from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results 18 | from detectron2.utils.logger import setup_logger 19 | 20 | from densepose import DatasetMapper, DensePoseCOCOEvaluator, add_densepose_config 21 | 22 | 23 | class Trainer(DefaultTrainer): 24 | @classmethod 25 | def build_evaluator(cls, cfg, dataset_name): 26 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 27 | evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)] 28 | if cfg.MODEL.DENSEPOSE_ON: 29 | evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder)) 30 | return DatasetEvaluators(evaluators) 31 | 32 | @classmethod 33 | def build_test_loader(cls, cfg, dataset_name): 34 | return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False)) 35 | 36 | @classmethod 37 | def build_train_loader(cls, cfg): 38 | return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True)) 39 | 40 | 41 | def setup(args): 42 | cfg = get_cfg() 43 | add_densepose_config(cfg) 44 | cfg.merge_from_file(args.config_file) 45 | cfg.merge_from_list(args.opts) 46 | cfg.freeze() 47 | default_setup(cfg, args) 48 | # Setup logger for "densepose" module 49 | setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose") 50 | return cfg 51 | 52 | 53 | def main(args): 54 | cfg = setup(args) 55 | 56 | if args.eval_only: 57 | model = Trainer.build_model(cfg) 58 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 59 | cfg.MODEL.WEIGHTS, resume=args.resume 60 | ) 61 | res = Trainer.test(cfg, model) 62 | if comm.is_main_process(): 63 | verify_results(cfg, res) 64 | return res 65 | 66 | trainer = Trainer(cfg) 67 | trainer.resume_or_load(resume=args.resume) 68 | return trainer.train() 69 | 70 | 71 | if __name__ == "__main__": 72 | args = default_argument_parser().parse_args() 73 | print("Command Line Args:", args) 74 | launch( 75 | main, 76 | args.num_gpus, 77 | num_machines=args.num_machines, 78 | machine_rank=args.machine_rank, 79 | dist_url=args.dist_url, 80 | args=(args,), 81 | ) 82 | -------------------------------------------------------------------------------- /projects/README.md: -------------------------------------------------------------------------------- 1 | 2 | Here are a few research projects that are built on detectron2. 3 | They are examples of how to use detectron2 as a library, to make your projects more 4 | maintainable. 5 | 6 | + [DensePose: Dense Human Pose Estimation In The Wild](DensePose) 7 | + [Scale-Aware Trident Networks for Object Detection](TridentNet) 8 | + TensorMask: A Foundation for Dense Object Segmentation. (Coming Soon) 9 | + Mesh R-CNN. (Coming Soon) 10 | -------------------------------------------------------------------------------- /projects/TridentNet/README.md: -------------------------------------------------------------------------------- 1 | 2 | # TridentNet in Detectron2 3 | **Scale-Aware Trident Networks for Object Detection** 4 | 5 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang 6 | 7 | [[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingTridentNet)] 8 | 9 |
10 | 11 |
12 | 13 | In this repository, we implement TridentNet-Fast in the Detectron2 framework. Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost. 14 | 15 | ## Training 16 | 17 | To train a model one can call 18 | ```bash 19 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file 20 | ``` 21 | 22 | For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs, 23 | one should execute: 24 | ```bash 25 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file /path/to/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml --num_gpus 8 26 | ``` 27 | 28 | ## Testing 29 | 30 | Model testing can be done in the same way as training, except for an additional flag `--eval-only` and 31 | model location specification through `MODEL.WEIGHT model.pth` in the command line 32 | ```bash 33 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file /path/to/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHT model.pth 34 | ``` 35 | 36 | ## Results on MS-COCO in Detectron2 37 | 38 | |Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl| 39 | |-----|--------|----|--------------|--|----|----|---|---|---| 40 | |Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7| 41 | |TridentFast|R50-C4|C5-128ROI|1X|37.9|57.8|40.7|19.7|42.1|54.2| 42 | |Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1| 43 | |TridentFast|R50-C4|C5-128ROI|3X|41.0|60.9|44.2|22.7|45.2|57.0| 44 | |Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9| 45 | |TridentFast|R101-C4|C5-128ROI|3X|43.4|62.9|46.6|24.2|47.9|59.9| 46 | 47 | 48 | ## Citing TridentNet 49 | 50 | If you use TridentNet, please use the following BibTeX entry. 51 | 52 | ``` 53 | @InProceedings{li2019scale, 54 | title={Scale-Aware Trident Networks for Object Detection}, 55 | author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, 56 | journal={The International Conference on Computer Vision (ICCV)}, 57 | year={2019} 58 | } 59 | ``` 60 | 61 | -------------------------------------------------------------------------------- /projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_trident_resnet_backbone" 5 | ROI_HEADS: 6 | NAME: "TridentRes5ROIHeads" 7 | POSITIVE_FRACTION: 0.5 8 | BATCH_SIZE_PER_IMAGE: 128 9 | PROPOSAL_APPEND_GT: False 10 | PROPOSAL_GENERATOR: 11 | NAME: "TridentRPN" 12 | RPN: 13 | POST_NMS_TOPK_TRAIN: 500 14 | TRIDENT: 15 | NUM_BRANCH: 3 16 | BRANCH_DILATIONS: [1, 2, 3] 17 | TEST_BRANCH_IDX: 1 18 | TRIDENT_STAGE: "res4" 19 | DATASETS: 20 | TRAIN: ("coco_2017_train",) 21 | TEST: ("coco_2017_val",) 22 | SOLVER: 23 | IMS_PER_BATCH: 16 24 | BASE_LR: 0.02 25 | STEPS: (60000, 80000) 26 | MAX_ITER: 90000 27 | INPUT: 28 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 29 | -------------------------------------------------------------------------------- /projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-TridentNet-Fast-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-TridentNet-Fast-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /projects/TridentNet/train_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | TridentNet Training Script. 4 | 5 | This script is a simplified version of the training script in detectron2/tools. 6 | """ 7 | 8 | import os 9 | 10 | import detectron2.utils.comm as comm 11 | from detectron2.checkpoint import DetectionCheckpointer 12 | from detectron2.config import get_cfg 13 | from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch 14 | from detectron2.evaluation import COCOEvaluator, verify_results 15 | 16 | from tridentnet import add_tridentnet_config 17 | 18 | 19 | class Trainer(DefaultTrainer): 20 | @classmethod 21 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 22 | if output_folder is None: 23 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 24 | return COCOEvaluator(dataset_name, cfg, True, output_folder) 25 | 26 | 27 | def setup(args): 28 | """ 29 | Create configs and perform basic setups. 30 | """ 31 | cfg = get_cfg() 32 | add_tridentnet_config(cfg) 33 | cfg.merge_from_file(args.config_file) 34 | cfg.merge_from_list(args.opts) 35 | cfg.freeze() 36 | default_setup(cfg, args) 37 | return cfg 38 | 39 | 40 | def main(args): 41 | cfg = setup(args) 42 | 43 | if args.eval_only: 44 | model = Trainer.build_model(cfg) 45 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 46 | cfg.MODEL.WEIGHTS, resume=args.resume 47 | ) 48 | res = Trainer.test(cfg, model) 49 | if comm.is_main_process(): 50 | verify_results(cfg, res) 51 | return res 52 | 53 | trainer = Trainer(cfg) 54 | trainer.resume_or_load(resume=args.resume) 55 | return trainer.train() 56 | 57 | 58 | if __name__ == "__main__": 59 | args = default_argument_parser().parse_args() 60 | print("Command Line Args:", args) 61 | launch( 62 | main, 63 | args.num_gpus, 64 | num_machines=args.num_machines, 65 | machine_rank=args.machine_rank, 66 | dist_url=args.dist_url, 67 | args=(args,), 68 | ) 69 | -------------------------------------------------------------------------------- /projects/TridentNet/tridentnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .config import add_tridentnet_config 3 | from .trident_backbone import ( 4 | TridentBottleneckBlock, 5 | build_trident_resnet_backbone, 6 | make_trident_stage, 7 | ) 8 | from .trident_rpn import TridentRPN 9 | from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads 10 | -------------------------------------------------------------------------------- /projects/TridentNet/tridentnet/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from detectron2.config import CfgNode as CN 5 | 6 | 7 | def add_tridentnet_config(cfg): 8 | """ 9 | Add config for tridentnet. 10 | """ 11 | _C = cfg 12 | 13 | _C.MODEL.TRIDENT = CN() 14 | 15 | # Number of branches for TridentNet. 16 | _C.MODEL.TRIDENT.NUM_BRANCH = 3 17 | # Specfiy the dilations for each branch. 18 | _C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3] 19 | # Specify the stage for applying trident blocks. Default stage is Res4 according to the 20 | # TridentNet paper. 21 | _C.MODEL.TRIDENT.TRIDENT_STAGE = "res4" 22 | # Specify the test branch index TridentNet Fast inference: 23 | # - use -1 to aggreate results of all branches during inference. 24 | # - otherwise, only using specified branch for fast inference. Recommended setting is 25 | # to use the middle branch. 26 | _C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1 27 | -------------------------------------------------------------------------------- /projects/TridentNet/tridentnet/trident_rpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY 5 | from detectron2.modeling.proposal_generator.rpn import RPN 6 | from detectron2.structures import ImageList 7 | 8 | 9 | @PROPOSAL_GENERATOR_REGISTRY.register() 10 | class TridentRPN(RPN): 11 | """ 12 | Trident RPN subnetwork. 13 | """ 14 | 15 | def __init__(self, cfg, input_shape): 16 | super(TridentRPN, self).__init__(cfg, input_shape) 17 | 18 | self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH 19 | self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 20 | 21 | def forward(self, images, features, gt_instances=None): 22 | """ 23 | See :class:`RPN.forward`. 24 | """ 25 | num_branch = self.num_branch if self.training or not self.trident_fast else 1 26 | # Duplicate images and gt_instances for all branches in TridentNet. 27 | all_images = ImageList( 28 | torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch 29 | ) 30 | all_gt_instances = gt_instances * num_branch if gt_instances is not None else None 31 | 32 | return super(TridentRPN, self).forward(all_images, features, all_gt_instances) 33 | -------------------------------------------------------------------------------- /scripts/demo.sh: -------------------------------------------------------------------------------- 1 | python demo/demo.py --config-file configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml \ 2 | --input input.jpg --output ./viscocodect/input_vis.png\ 3 | --opts MODEL.WEIGHTS detectron2://COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl 4 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | output_dir="./outputs/flickr30kRegRel" 3 | DATE=`date "+%m-%d-%h"` 4 | 5 | exp_name="07-26-Jul-GroundR-Visual(Rel_T50-NoST-P7-s5, VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2,decShare)_Phr(Sent,UniMean,1Emb)_Reg(Warmup75,2layer,0p6,smax,0p1,GAP0p1)_DISC(smean,sent,M0.2)_rel(1p0Cls,2Stage,MP_trans)_SGD_0.001_v1_work8" 6 | 7 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR", "WarmupPolyLR" 8 | ## Optimizer "SGD", "Adam" 9 | ## Phrase: "Mean", "Sum", "Last" 10 | 11 | 12 | export CUDA_VISIBLE_DEVICES=0,1,2,3 13 | #export CUDA_VISIBLE_DEVICES=3,5,6,7 14 | python tools/train_weakly_grounding.py --num-gpus 4 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 15 | OUTPUT_DIR "$output_dir/$exp_name"\ 16 | SOLVER.OPTIMIZER 'SGD' \ 17 | SOLVER.IMS_PER_BATCH 40 \ 18 | SOLVER.BASE_LR 0.001 \ 19 | SOLVER.DISC_IMG_SENT_LOSS True \ 20 | SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 21 | SOLVER.STEPS "(32000, 40000,)" \ 22 | SOLVER.MAX_ITER 80000 \ 23 | SOLVER.REG_START_ITER 7500 \ 24 | SOLVER.CHECKPOINT_PERIOD 2500 \ 25 | MODEL.VG.NETWORK 'RegRel'\ 26 | MODEL.VG.SEMANTIC_NOUNS_TOPK 300 \ 27 | MODEL.VG.SEM_NOUNS_LOSS_FACTOR 0.5 \ 28 | MODEL.VG.SEMANTIC_ATTR_TOPK 79 \ 29 | MODEL.VG.REL_CLS_LOSS_FACTOR 1.0 \ 30 | MODEL.VG.REG_LOSS_FACTOR 0.1 \ 31 | MODEL.VG.REG_IOU 0.6 \ 32 | MODEL.VG.REG_GAP_SCORE 0.1 \ 33 | MODEL.VG.SPATIAL_FEAT False \ 34 | MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 35 | MODEL.VG.PRECOMP_TOPK 50 \ 36 | MODEL.VG.S2_TOPK 5 \ 37 | MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 38 | MODEL.VG.LSTM_BIDIRECTION False \ 39 | MODEL.VG.USING_DET_KNOWLEDGE True \ 40 | MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 41 | MODEL.RELATION.IS_ON True \ 42 | DATALOADER.NUM_WORKERS 8 \ 43 | DATASETS.NAME 'flickr30k'\ 44 | DATASETS.TEST "(\"flickr30k_val\", \"flickr30k_test\")" \ 45 | MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0074999.pth" 46 | 47 | 48 | 49 | #export CUDA_VISIBLE_DEVICES=1,2 50 | #python tools/train_weakly_grounding.py --num-gpus 2 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 51 | # OUTPUT_DIR "$output_dir/$exp_name" \ 52 | # SOLVER.OPTIMIZER 'SGD' \ 53 | # SOLVER.IMS_PER_BATCH 2 \ 54 | # SOLVER.BASE_LR 0.0005 \ 55 | # SOLVER.INIT_PARA False \ 56 | # SOLVER.FIX_BACKBONE True \ 57 | # SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 58 | # SOLVER.STEPS "(20000, 40000,)" \ 59 | # SOLVER.MAX_ITER 80000 \ 60 | # MODEL.VG.NETWORK 'Reg' \ 61 | # SOLVER.CHECKPOINT_PERIOD 2500 \ 62 | # MODEL.VG.SPATIAL_FEAT False \ 63 | # MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 64 | # MODEL.VG.PRECOMP_TOPK 50 \ 65 | # MODEL.VG.S2_TOPK 5 \ 66 | # MODEL.VG.USING_ELMO False \ 67 | # MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 68 | # MODEL.VG.LSTM_BIDIRECTION False \ 69 | # MODEL.VG.USING_DET_KNOWLEDGE True \ 70 | # MODEL.RELATION.IS_ON False \ 71 | # MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 72 | # MODEL.RELATION.INTRA_LAN False \ 73 | # MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0047499.pth" \ 74 | # DATALOADER.NUM_WORKERS 4 \ 75 | # DATASETS.NAME 'flickr30k' \ 76 | # DATALOADER.ASPECT_RATIO_GROUPING True \ 77 | # TEST.EVAL_PERIOD 2500 78 | -------------------------------------------------------------------------------- /scripts/test_kac.sh: -------------------------------------------------------------------------------- 1 | output_dir="./outputs/flickr30k_kac" 2 | DATE=`date "+%m-%d-%h"` 3 | 4 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2) 5 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR", "WarmupPolyLR" 6 | ## Optimizer "SGD", "Adam" 7 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3) 8 | ## Phrase: "Mean", "Sum", "Last" 9 | 10 | ## 11 | 12 | exp_name="06-17-Jun-GroundR-Visual(50-P7,DetSkipPrior,attvis,BN,spnorm)_Phr(Sent,UniMean)_visconst(w10,sum)_loss(m0p5)_SGD_0.0005_v1" 13 | 14 | 15 | export CUDA_VISIBLE_DEVICES=0 16 | #export CUDA_VISIBLE_DEVICES=3,5,6,7 17 | python tools/train_kac.py --num-gpus 1 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 18 | OUTPUT_DIR "$output_dir/$exp_name"\ 19 | SOLVER.OPTIMIZER 'SGD' \ 20 | SOLVER.IMS_PER_BATCH 1 \ 21 | SOLVER.DISC_IMG_SENT_LOSS False \ 22 | SOLVER.BASE_LR 0.0005 \ 23 | SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 24 | SOLVER.STEPS "(32000, 40000,)" \ 25 | SOLVER.MAX_ITER 80000 \ 26 | SOLVER.CHECKPOINT_PERIOD 2500 \ 27 | MODEL.VG.SPATIAL_FEAT False \ 28 | MODEL.VG.NETWORK 'Kac' \ 29 | MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 30 | MODEL.VG.PRECOMP_TOPK 50 \ 31 | MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 32 | MODEL.VG.LSTM_BIDIRECTION False \ 33 | MODEL.VG.USING_DET_KNOWLEDGE True \ 34 | MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 35 | DATALOADER.NUM_WORKERS 8 \ 36 | TEST.EVAL_PERIOD 2500 \ 37 | DATASETS.TEST "(\"flickr30k_val\", \"flickr30k_test\")" \ 38 | MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0054999.pth" -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | output_dir="./outputs/flickr30k" 3 | DATE=`date "+%m-%d-%h"` 4 | 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2) 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR", "WarmupPolyLR" 7 | ## Optimizer "SGD", "Adam" 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3) 9 | ## Phrase: "Mean", "Sum", "Last" 10 | #$DATE-GroundR-Visual(T50-K5-NoST-P7, VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2, decShare)_Phr(Sent,UniMean,1Emb)_Reg(True75,2layerLeakly,0p6,smax,0p1,GAP0p3)_DISC(smean,sent,M0.2)_NoREL(bo)_SGD_0.001_v1 11 | ## NETWORK 'ML_Reg', 'Reg', 'PixelBox', 'Kac', 'Baseline', 'Baseline_s2' 12 | ## --gpu-check 13 | ## rel(0p1Cls,1Stage) 14 | ## rel(0p1Cls,2Stage,MP_trans) 15 | ## Reg(Warmup8w,2layer,0p6,smax,0p0,GAP0p1,lossOffNoApply) 16 | ## VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2,decShare 17 | 18 | export CUDA_VISIBLE_DEVICES=0,1,2,3 19 | #export CUDA_VISIBLE_DEVICES=3,5,6,7 20 | python tools/train_weakly_grounding.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 21 | OUTPUT_DIR "$output_dir/$DATE-Visual(T50-NoST-P7-s5)_Phr(Sent,UniMean,1Emb)_DISC(smean,sent,M0.2)_Reg(Warmup75,2layer,0p6,smax,0p1,GAP0p1)_rel(1p0Cls,2Stage,MP_trans)_SGD_0.001"\ 22 | SOLVER.OPTIMIZER 'SGD' \ 23 | SOLVER.IMS_PER_BATCH 40 \ 24 | SOLVER.BASE_LR 0.001 \ 25 | SOLVER.DISC_IMG_SENT_LOSS True \ 26 | SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 27 | SOLVER.STEPS "(32000, 40000,)" \ 28 | SOLVER.MAX_ITER 80000 \ 29 | SOLVER.REG_START_ITER 7500 \ 30 | SOLVER.CHECKPOINT_PERIOD 2500 \ 31 | MODEL.VG.NETWORK 'RegRel'\ 32 | MODEL.VG.REL_CLS_LOSS_FACTOR 1.0 \ 33 | MODEL.VG.REG_LOSS_FACTOR 0.1 \ 34 | MODEL.VG.REG_IOU 0.6 \ 35 | MODEL.VG.REG_GAP_SCORE 0.1 \ 36 | MODEL.VG.SPATIAL_FEAT False \ 37 | MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 38 | MODEL.VG.PRECOMP_TOPK 50 \ 39 | MODEL.VG.S2_TOPK 5 \ 40 | MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 41 | MODEL.VG.LSTM_BIDIRECTION False \ 42 | MODEL.VG.USING_DET_KNOWLEDGE True \ 43 | MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 44 | MODEL.RELATION.IS_ON True \ 45 | DATALOADER.NUM_WORKERS 8 \ 46 | DATASETS.NAME 'flickr30k'\ 47 | TEST.EVAL_PERIOD 2500 48 | -------------------------------------------------------------------------------- /scripts/train_baseline.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | output_dir="./outputs/flickr30k_kac" 3 | DATE=`date "+%m-%d-%h"` 4 | 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2) 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR", "WarmupPolyLR" 7 | ## Optimizer "SGD", "Adam" 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3) 9 | ## Phrase: "Mean", "Sum", "Last" 10 | export CUDA_VISIBLE_DEVICES=0,1,2,3 11 | #export CUDA_VISIBLE_DEVICES=3,5,6,7 12 | python tools/train_kac.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 13 | OUTPUT_DIR "$output_dir/$DATE-bs_GroundR-Visual(50-P7,DetSkipPrior,attvis,BN)_Phr(Sent,UniMean)_DISC(None)_SGD_0.0005_v2"\ 14 | SOLVER.OPTIMIZER 'SGD' \ 15 | SOLVER.IMS_PER_BATCH 40 \ 16 | SOLVER.BASE_LR 0.0005 \ 17 | SOLVER.DISC_IMG_SENT_LOSS False \ 18 | SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 19 | SOLVER.STEPS "(32000, 40000,)" \ 20 | SOLVER.MAX_ITER 80000 \ 21 | SOLVER.CHECKPOINT_PERIOD 2500 \ 22 | MODEL.VG.REG_LOSS_FACTOR 1.0 \ 23 | MODEL.VG.SPATIAL_FEAT False \ 24 | MODEL.VG.NETWORK 'Baseline' \ 25 | MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 26 | MODEL.VG.PRECOMP_TOPK 50 \ 27 | MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 28 | MODEL.VG.LSTM_BIDIRECTION False \ 29 | MODEL.VG.USING_DET_KNOWLEDGE True \ 30 | MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 31 | DATALOADER.NUM_WORKERS 8 \ 32 | TEST.EVAL_PERIOD 2500 -------------------------------------------------------------------------------- /scripts/train_kac.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | output_dir="./outputs/flickr30k_kac" 3 | DATE=`date "+%m-%d-%h"` 4 | 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2) 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR", "WarmupPolyLR" 7 | ## Optimizer "SGD", "Adam" 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3) 9 | ## Phrase: "Mean", "Sum", "Last" 10 | export CUDA_VISIBLE_DEVICES=0,1,2,3 11 | #export CUDA_VISIBLE_DEVICES=3,5,6,7 12 | python tools/train_kac.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \ 13 | OUTPUT_DIR "$output_dir/$DATE-GroundR-Visual(50-P7,DetSkipPrior,attvis,BN,NST)_Phr(Phr,UniMean)_Disc(None)_SGD_0.0005_v1"\ 14 | SOLVER.OPTIMIZER 'SGD' \ 15 | SOLVER.IMS_PER_BATCH 40 \ 16 | SOLVER.DISC_IMG_SENT_LOSS False \ 17 | SOLVER.BASE_LR 0.0005 \ 18 | SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \ 19 | SOLVER.STEPS "(32000, 40000,)" \ 20 | SOLVER.MAX_ITER 80000 \ 21 | SOLVER.CHECKPOINT_PERIOD 2500 \ 22 | MODEL.VG.SPATIAL_FEAT False \ 23 | MODEL.VG.NETWORK 'Baseline' \ 24 | MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \ 25 | MODEL.VG.PRECOMP_TOPK 50 \ 26 | MODEL.VG.EMBEDDING_SOURCE 'Sent' \ 27 | MODEL.VG.LSTM_BIDIRECTION False \ 28 | MODEL.VG.USING_DET_KNOWLEDGE True \ 29 | MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \ 30 | DATALOADER.NUM_WORKERS 8 \ 31 | TEST.EVAL_PERIOD 2500 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=4 4 | known_standard_library=numpy,setuptools 5 | known_myself=detectron2 6 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,torchvision 7 | no_lines_before=STDLIB,THIRDPARTY 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 9 | default_section=FIRSTPARTY 10 | 11 | [mypy] 12 | python_version=3.6 13 | ignore_missing_imports = True 14 | warn_unused_configs = True 15 | disallow_untyped_defs = True 16 | check_untyped_defs = True 17 | warn_unused_ignores = True 18 | warn_redundant_casts = True 19 | show_column_numbers = True 20 | follow_imports = silent 21 | allow_redefinition = True 22 | ; Require all functions to be annotated 23 | disallow_incomplete_defs = True 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | from setuptools import find_packages, setup 7 | import torch 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | 11 | def get_extensions(): 12 | this_dir = os.path.dirname(os.path.abspath(__file__)) 13 | extensions_dir = os.path.join(this_dir, "detectron2", "layers", "csrc") 14 | 15 | main_source = os.path.join(extensions_dir, "vision.cpp") 16 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 17 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) 18 | 19 | sources = [main_source] + sources 20 | 21 | extension = CppExtension 22 | 23 | extra_compile_args = {"cxx": []} 24 | define_macros = [] 25 | 26 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 27 | extension = CUDAExtension 28 | sources += source_cuda 29 | define_macros += [("WITH_CUDA", None)] 30 | extra_compile_args["nvcc"] = [ 31 | "-DCUDA_HAS_FP16=1", 32 | "-D__CUDA_NO_HALF_OPERATORS__", 33 | "-D__CUDA_NO_HALF_CONVERSIONS__", 34 | "-D__CUDA_NO_HALF2_OPERATORS__", 35 | ] 36 | 37 | # It's better if pytorch can do this by default .. 38 | CC = os.environ.get("CC", None) 39 | if CC is not None: 40 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "detectron2._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="detectron2", 61 | version="0.1", 62 | author="FAIR", 63 | url="https://github.com/facebookresearch/detectron2", 64 | description="Detectron2 is FAIR's next-generation research " 65 | "platform for object detection and segmentation.", 66 | packages=find_packages(exclude=("configs", "tests")), 67 | install_requires=[ 68 | "termcolor>=1.1", 69 | "Pillow", 70 | "yacs>=0.1.6", 71 | "tabulate", 72 | "cloudpickle", 73 | "matplotlib", 74 | "tqdm>4.29.0", 75 | "shapely", 76 | "tensorboard", 77 | ], 78 | ext_modules=get_extensions(), 79 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 80 | ) 81 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /tests/test_box2box_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import unittest 4 | import torch 5 | 6 | from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def random_boxes(mean_box, stdev, N): 12 | return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) 13 | 14 | 15 | class TestBox2BoxTransform(unittest.TestCase): 16 | def test_reconstruction(self): 17 | weights = (5, 5, 10, 10) 18 | b2b_tfm = Box2BoxTransform(weights=weights) 19 | src_boxes = random_boxes([10, 10, 20, 20], 1, 10) 20 | dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) 21 | 22 | devices = [torch.device("cpu")] 23 | if torch.cuda.is_available(): 24 | devices.append(torch.device("cuda")) 25 | for device in devices: 26 | src_boxes = src_boxes.to(device=device) 27 | dst_boxes = dst_boxes.to(device=device) 28 | deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) 29 | dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) 30 | assert torch.allclose(dst_boxes, dst_boxes_reconstructed) 31 | 32 | 33 | def random_rotated_boxes(mean_box, std_length, std_angle, N): 34 | return torch.cat( 35 | [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1 36 | ) + torch.tensor(mean_box, dtype=torch.float) 37 | 38 | 39 | class TestBox2BoxTransformRotated(unittest.TestCase): 40 | def test_reconstruction(self): 41 | weights = (5, 5, 10, 10, 1) 42 | b2b_transform = Box2BoxTransformRotated(weights=weights) 43 | src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) 44 | dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) 45 | 46 | devices = [torch.device("cpu")] 47 | if torch.cuda.is_available(): 48 | devices.append(torch.device("cuda")) 49 | for device in devices: 50 | src_boxes = src_boxes.to(device=device) 51 | dst_boxes = dst_boxes.to(device=device) 52 | deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) 53 | dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes) 54 | assert torch.allclose(dst_boxes, dst_boxes_reconstructed, atol=1e-5) 55 | 56 | 57 | if __name__ == "__main__": 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /tests/test_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | import unittest 4 | import torch 5 | 6 | from detectron2.structures import Boxes, BoxMode, pairwise_iou 7 | 8 | 9 | class TestBoxMode(unittest.TestCase): 10 | def _convert_xy_to_wh(self, x): 11 | return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) 12 | 13 | def test_box_convert_list(self): 14 | for tp in [list, tuple]: 15 | box = tp([5, 5, 10, 10]) 16 | output = self._convert_xy_to_wh(box) 17 | self.assertTrue(output == tp([5, 5, 5, 5])) 18 | 19 | with self.assertRaises(Exception): 20 | self._convert_xy_to_wh([box]) 21 | 22 | def test_box_convert_array(self): 23 | box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]]) 24 | output = self._convert_xy_to_wh(box) 25 | self.assertTrue((output[0] == [5, 5, 5, 5]).all()) 26 | self.assertTrue((output[1] == [1, 1, 1, 2]).all()) 27 | 28 | def test_box_convert_tensor(self): 29 | box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) 30 | output = self._convert_xy_to_wh(box).numpy() 31 | self.assertTrue((output[0] == [5, 5, 5, 5]).all()) 32 | self.assertTrue((output[1] == [1, 1, 1, 2]).all()) 33 | 34 | 35 | class TestBoxIOU(unittest.TestCase): 36 | def test_pairwise_iou(self): 37 | boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) 38 | 39 | boxes2 = torch.tensor( 40 | [ 41 | [0.0, 0.0, 1.0, 1.0], 42 | [0.0, 0.0, 0.5, 1.0], 43 | [0.0, 0.0, 1.0, 0.5], 44 | [0.0, 0.0, 0.5, 0.5], 45 | [0.5, 0.5, 1.0, 1.0], 46 | [0.5, 0.5, 1.5, 1.5], 47 | ] 48 | ) 49 | 50 | expected_ious = torch.tensor( 51 | [ 52 | [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], 53 | [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], 54 | ] 55 | ) 56 | 57 | ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) 58 | 59 | assert torch.allclose(ious, expected_ious) 60 | 61 | 62 | if __name__ == "__main__": 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /tests/test_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import unittest 3 | from collections import OrderedDict 4 | import torch 5 | from torch import nn 6 | 7 | from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts 8 | from detectron2.utils.logger import setup_logger 9 | 10 | 11 | class TestCheckpointer(unittest.TestCase): 12 | def setUp(self): 13 | setup_logger() 14 | 15 | def create_complex_model(self): 16 | m = nn.Module() 17 | m.block1 = nn.Module() 18 | m.block1.layer1 = nn.Linear(2, 3) 19 | m.layer2 = nn.Linear(3, 2) 20 | m.res = nn.Module() 21 | m.res.layer2 = nn.Linear(3, 2) 22 | 23 | state_dict = OrderedDict() 24 | state_dict["layer1.weight"] = torch.rand(3, 2) 25 | state_dict["layer1.bias"] = torch.rand(3) 26 | state_dict["layer2.weight"] = torch.rand(2, 3) 27 | state_dict["layer2.bias"] = torch.rand(2) 28 | state_dict["res.layer2.weight"] = torch.rand(2, 3) 29 | state_dict["res.layer2.bias"] = torch.rand(2) 30 | return m, state_dict 31 | 32 | def test_complex_model_loaded(self): 33 | for add_data_parallel in [False, True]: 34 | model, state_dict = self.create_complex_model() 35 | if add_data_parallel: 36 | model = nn.DataParallel(model) 37 | model_sd = model.state_dict() 38 | 39 | align_and_update_state_dicts(model_sd, state_dict) 40 | for loaded, stored in zip(model_sd.values(), state_dict.values()): 41 | # different tensor references 42 | self.assertFalse(id(loaded) == id(stored)) 43 | # same content 44 | self.assertTrue(loaded.equal(stored)) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | 5 | import os 6 | import tempfile 7 | import unittest 8 | 9 | from detectron2.config import downgrade_config, get_cfg, upgrade_config 10 | 11 | _V0_CFG = """ 12 | MODEL: 13 | RPN_HEAD: 14 | NAME: "TEST" 15 | VERSION: 0 16 | """ 17 | 18 | _V1_CFG = """ 19 | MODEL: 20 | WEIGHT: "/path/to/weight" 21 | """ 22 | 23 | 24 | class TestConfigVersioning(unittest.TestCase): 25 | def test_upgrade_downgrade_consistency(self): 26 | cfg = get_cfg() 27 | # check that custom is preserved 28 | cfg.USER_CUSTOM = 1 29 | 30 | down = downgrade_config(cfg, to_version=0) 31 | up = upgrade_config(down) 32 | self.assertTrue(up == cfg) 33 | 34 | def _merge_cfg_str(self, cfg, merge_str): 35 | f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) 36 | try: 37 | f.write(merge_str) 38 | f.close() 39 | cfg.merge_from_file(f.name) 40 | finally: 41 | os.remove(f.name) 42 | return cfg 43 | 44 | def test_auto_upgrade(self): 45 | cfg = get_cfg() 46 | latest_ver = cfg.VERSION 47 | cfg.USER_CUSTOM = 1 48 | 49 | self._merge_cfg_str(cfg, _V0_CFG) 50 | 51 | self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST") 52 | self.assertEqual(cfg.VERSION, latest_ver) 53 | 54 | def test_guess_v1(self): 55 | cfg = get_cfg() 56 | latest_ver = cfg.VERSION 57 | self._merge_cfg_str(cfg, _V1_CFG) 58 | self.assertEqual(cfg.VERSION, latest_ver) 59 | -------------------------------------------------------------------------------- /tests/test_data_transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import logging 5 | import numpy as np 6 | import unittest 7 | 8 | from detectron2.config import get_cfg 9 | from detectron2.data import detection_utils 10 | from detectron2.data import transforms as T 11 | from detectron2.utils.logger import setup_logger 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class TestTransforms(unittest.TestCase): 17 | def setUp(self): 18 | setup_logger() 19 | 20 | def test_crop_polygons(self): 21 | # Ensure that shapely produce an extra vertex at the end 22 | import shapely.geometry as geometry 23 | 24 | polygon = np.asarray([3, 3.5, 11, 10.0, 38, 98, 15.0, 100.0]).reshape(-1, 2) 25 | g = geometry.Polygon(polygon) 26 | coords = np.asarray(g.exterior.coords) 27 | self.assertEqual(coords[0].tolist(), coords[-1].tolist()) 28 | 29 | def test_apply_rotated_boxes(self): 30 | np.random.seed(125) 31 | cfg = get_cfg() 32 | is_train = True 33 | transform_gen = detection_utils.build_transform_gen(cfg, is_train) 34 | image = np.random.rand(200, 300) 35 | image, transforms = T.apply_transform_gens(transform_gen, image) 36 | image_shape = image.shape[:2] # h, w 37 | assert image_shape == (800, 1200) 38 | annotation = {"bbox": [179, 97, 62, 40, -56]} 39 | 40 | boxes = np.array([annotation["bbox"]], dtype=np.float64) # boxes.shape = (1, 5) 41 | transformed_bbox = transforms.apply_rotated_box(boxes)[0] 42 | 43 | expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64) 44 | err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox) 45 | assert np.allclose(transformed_bbox, expected_bbox), err_msg 46 | 47 | def test_apply_rotated_boxes_unequal_scaling_factor(self): 48 | np.random.seed(125) 49 | h, w = 400, 200 50 | newh, neww = 800, 800 51 | image = np.random.rand(h, w) 52 | transform_gen = [] 53 | transform_gen.append(T.Resize(shape=(newh, neww))) 54 | image, transforms = T.apply_transform_gens(transform_gen, image) 55 | image_shape = image.shape[:2] # h, w 56 | assert image_shape == (newh, neww) 57 | 58 | boxes = np.array( 59 | [ 60 | [150, 100, 40, 20, 0], 61 | [150, 100, 40, 20, 30], 62 | [150, 100, 40, 20, 90], 63 | [150, 100, 40, 20, -90], 64 | ], 65 | dtype=np.float64, 66 | ) 67 | transformed_boxes = transforms.apply_rotated_box(boxes) 68 | 69 | expected_bboxes = np.array( 70 | [ 71 | [600, 200, 160, 40, 0], 72 | [600, 200, 144.22205102, 52.91502622, 49.10660535], 73 | [600, 200, 80, 80, 90], 74 | [600, 200, 80, 80, -90], 75 | ], 76 | dtype=np.float64, 77 | ) 78 | err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes) 79 | assert np.allclose(transformed_boxes, expected_bboxes), err_msg 80 | -------------------------------------------------------------------------------- /tests/test_roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | import unittest 4 | import cv2 5 | import torch 6 | 7 | from detectron2.layers.roi_align import ROIAlign 8 | 9 | 10 | class ROIAlignTest(unittest.TestCase): 11 | def test_forward_output(self): 12 | input = np.arange(25).reshape(5, 5).astype("float32") 13 | """ 14 | 0 1 2 3 4 15 | 5 6 7 8 9 16 | 10 11 12 13 14 17 | 15 16 17 18 19 18 | 20 21 22 23 24 19 | """ 20 | 21 | output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False) 22 | output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True) 23 | 24 | # without correction: 25 | old_results = [ 26 | [7.5, 8, 8.5, 9], 27 | [10, 10.5, 11, 11.5], 28 | [12.5, 13, 13.5, 14], 29 | [15, 15.5, 16, 16.5], 30 | ] 31 | 32 | # with 0.5 correction: 33 | correct_results = [ 34 | [4.5, 5.0, 5.5, 6.0], 35 | [7.0, 7.5, 8.0, 8.5], 36 | [9.5, 10.0, 10.5, 11.0], 37 | [12.0, 12.5, 13.0, 13.5], 38 | ] 39 | # This is an upsampled version of [[6, 7], [11, 12]] 40 | 41 | self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten())) 42 | self.assertTrue( 43 | np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten()) 44 | ) 45 | 46 | # Also see similar issues in tensorflow at 47 | # https://github.com/tensorflow/tensorflow/issues/26278 48 | 49 | def test_resize(self): 50 | H, W = 30, 30 51 | input = np.random.rand(H, W).astype("float32") * 100 52 | box = [10, 10, 20, 20] 53 | output = self._simple_roialign(input, box, (5, 5), aligned=True) 54 | 55 | input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) 56 | box2x = [x / 2 for x in box] 57 | output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True) 58 | diff = np.abs(output2x - output) 59 | self.assertTrue(diff.max() < 1e-4) 60 | 61 | def _simple_roialign(self, img, box, resolution, aligned=True): 62 | """ 63 | RoiAlign with scale 1.0 and 0 sample ratio. 64 | """ 65 | if isinstance(resolution, int): 66 | resolution = (resolution, resolution) 67 | op = ROIAlign(resolution, 1.0, 0, aligned=aligned) 68 | input = torch.from_numpy(img[None, None, :, :].astype("float32")) 69 | 70 | rois = [0] + list(box) 71 | rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) 72 | output = op.forward(input, rois).numpy() 73 | if torch.cuda.is_available(): 74 | output_cuda = op.forward(input.cuda(), rois.cuda()).cpu().numpy() 75 | self.assertTrue(np.allclose(output, output_cuda)) 76 | return output[0, 0] 77 | 78 | def test_empty_box(self): 79 | img = np.random.rand(5, 5) 80 | box = [3, 4, 5, 4] 81 | o = self._simple_roialign(img, box, 7) 82 | self.assertTrue((o == 0).all()) 83 | 84 | 85 | if __name__ == "__main__": 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /tests/test_roi_pooler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import unittest 4 | import torch 5 | 6 | from detectron2.modeling.poolers import ROIPooler 7 | from detectron2.structures import Boxes, RotatedBoxes 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class TestROIPooler(unittest.TestCase): 13 | def _rand_boxes(self, num_boxes, x_max, y_max): 14 | coords = torch.rand(num_boxes, 4) 15 | coords[:, 0] *= x_max 16 | coords[:, 1] *= y_max 17 | coords[:, 2] *= x_max 18 | coords[:, 3] *= y_max 19 | boxes = torch.zeros(num_boxes, 4) 20 | boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2]) 21 | boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3]) 22 | boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2]) 23 | boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3]) 24 | return boxes 25 | 26 | def _test_roialignv2_roialignrotated_match(self, device): 27 | pooler_resolution = 14 28 | canonical_level = 4 29 | canonical_scale_factor = 2 ** canonical_level 30 | pooler_scales = (1.0 / canonical_scale_factor,) 31 | sampling_ratio = 0 32 | 33 | N, C, H, W = 2, 4, 10, 8 34 | N_rois = 10 35 | std = 11 36 | mean = 0 37 | feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean 38 | 39 | features = [feature.to(device)] 40 | 41 | rois = [] 42 | rois_rotated = [] 43 | for _ in range(N): 44 | boxes = self._rand_boxes( 45 | num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor 46 | ) 47 | 48 | rotated_boxes = torch.zeros(N_rois, 5) 49 | rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 50 | rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 51 | rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] 52 | rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] 53 | rois.append(Boxes(boxes).to(device)) 54 | rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) 55 | 56 | roialignv2_pooler = ROIPooler( 57 | output_size=pooler_resolution, 58 | scales=pooler_scales, 59 | sampling_ratio=sampling_ratio, 60 | pooler_type="ROIAlignV2", 61 | ) 62 | 63 | roialignv2_out = roialignv2_pooler(features, rois) 64 | 65 | roialignrotated_pooler = ROIPooler( 66 | output_size=pooler_resolution, 67 | scales=pooler_scales, 68 | sampling_ratio=sampling_ratio, 69 | pooler_type="ROIAlignRotated", 70 | ) 71 | 72 | roialignrotated_out = roialignrotated_pooler(features, rois_rotated) 73 | 74 | assert torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4) 75 | 76 | def test_roialignv2_roialignrotated_match_cpu(self): 77 | self._test_roialignv2_roialignrotated_match(device="cpu") 78 | 79 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") 80 | def test_roialignv2_roialignrotated_match_cuda(self): 81 | self._test_roialignv2_roialignrotated_match(device="cuda") 82 | 83 | 84 | if __name__ == "__main__": 85 | unittest.main() 86 | -------------------------------------------------------------------------------- /tests/test_visualizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | import numpy as np 6 | import unittest 7 | import torch 8 | 9 | from detectron2.data import MetadataCatalog 10 | from detectron2.structures import Instances 11 | from detectron2.utils.visualizer import Visualizer 12 | 13 | 14 | class TestVisualizer(unittest.TestCase): 15 | def _random_data(self): 16 | H, W = 100, 100 17 | N = 10 18 | img = np.random.rand(H, W, 3) * 255 19 | boxxy = np.random.rand(N, 2) * (H // 2) 20 | boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1) 21 | 22 | def _rand_poly(): 23 | return np.random.rand(3, 2).flatten() * H 24 | 25 | polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)] 26 | 27 | mask = np.zeros_like(img[:, :, 0], dtype=np.bool) 28 | mask[:10, 10:20] = 1 29 | 30 | labels = [str(i) for i in range(N)] 31 | return img, boxes, labels, polygons, [mask] * N 32 | 33 | @property 34 | def metadata(self): 35 | return MetadataCatalog.get("coco_2017_train") 36 | 37 | def test_overlay_instances(self): 38 | img, boxes, labels, polygons, masks = self._random_data() 39 | 40 | v = Visualizer(img, self.metadata) 41 | output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() 42 | self.assertEqual(output.shape, img.shape) 43 | 44 | # Test 2x scaling 45 | v = Visualizer(img, self.metadata, scale=2.0) 46 | output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() 47 | self.assertEqual(output.shape[0], img.shape[0] * 2) 48 | 49 | # Test overlay masks 50 | v = Visualizer(img, self.metadata) 51 | output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image() 52 | self.assertEqual(output.shape, img.shape) 53 | 54 | def test_overlay_instances_no_boxes(self): 55 | img, boxes, labels, polygons, _ = self._random_data() 56 | v = Visualizer(img, self.metadata) 57 | v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image() 58 | 59 | def test_draw_instance_predictions(self): 60 | img, boxes, _, _, masks = self._random_data() 61 | num_inst = len(boxes) 62 | inst = Instances((img.shape[0], img.shape[1])) 63 | inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) 64 | inst.scores = torch.rand(num_inst) 65 | inst.pred_boxes = torch.from_numpy(boxes) 66 | inst.pred_masks = torch.from_numpy(np.asarray(masks)) 67 | 68 | v = Visualizer(img, self.metadata) 69 | v.draw_instance_predictions(inst) 70 | 71 | def test_correct_output_shape(self): 72 | img = np.random.rand(928, 928, 3) * 255 73 | v = Visualizer(img, self.metadata) 74 | out = v.output.get_image() 75 | self.assertEqual(out.shape, img.shape) 76 | -------------------------------------------------------------------------------- /unit_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.6 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2019/11/22 19:13 4 | 5 | 6 | import numpy as np 7 | import torch.nn as nn 8 | import torch 9 | import os.path as osp 10 | import os 11 | import pickle 12 | 13 | 14 | 15 | def extract_boxes(): 16 | 17 | 18 | path_list = os.listdir('./flickr30k_datasets/flickr30k_feat_nms/flickr30k_torch_nms1e4_feat') 19 | 20 | precomp_annos = {} 21 | for img_id in path_list: 22 | 23 | feat_path = './flickr30k_datasets/flickr30k_feat_nms/flickr30k_torch_nms1e4_feat/{}'.format(img_id) 24 | 25 | with open(osp.join(feat_path), 'rb') as load_f: 26 | res = pickle.load(load_f) 27 | 28 | imgs = img_id.split('.')[0] 29 | bbox_data = res['boxes'] 30 | img_scale = res['img_scale'] 31 | 32 | precomp_annos[imgs] = {'boxes': bbox_data, 'img_scale': img_scale} 33 | print(img_id, 'done') 34 | 35 | with open('./flickr30k_datasets/flickr30k_anno/precomp_annos.pkl', 'wb') as dump_f: 36 | pickle.dump(precomp_annos, dump_f) 37 | 38 | if __name__ == '__main__': 39 | 40 | extract_boxes() 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | --------------------------------------------------------------------------------